In [None]:
import gc
import os
import re
import warnings
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
import pandas as pd
import torch
from datasets import Dataset
from IPython.display import display
from peft import LoraConfig, get_peft_model
from sklearn.metrics import f1_score, hamming_loss, jaccard_score
from tqdm import tqdm
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    DataCollatorForLanguageModeling,
    Trainer,
    TrainingArguments,
)

warnings.filterwarnings("ignore")

LANGUAGE = "es"
BASE_PATH = "XED/processed"
MODEL_NAME = "tiiuae/Falcon3-3B-Instruct"
OUTPUT_DIR_ROOT = f"./{LANGUAGE}_gpt_results"
LOG_DIR_ROOT = f"./logs_falcon_{LANGUAGE}"
WEIGHTS_DIR_ROOT = f"./weights/falcon"
SAVE_DIR_FINAL = os.path.join(
    WEIGHTS_DIR_ROOT, f"lora_falcon_{LANGUAGE}_best_final"
)

PARAMS = {
    "r": 8,
    "lora_alpha": 32,
    "lora_dropout": 0.10
}
LORA_TARGET_MODULES = [
    "q_proj", "k_proj", "v_proj", "o_proj",
    "up_proj", "down_proj", "gate_proj"
]
ALL_LABELS = [str(i) for i in range(1, 9)]
MAX_INPUT_LENGTH = 256
EVAL_SAMPLES_COUNT_CV = 300
LEARNING_RATE = 2e-4


def predict_emotions_es(
    model: torch.nn.Module,
    tokenizer: AutoTokenizer,
    df: pd.DataFrame,
    max_samples: Optional[int] = 300
) -> Tuple[List[str], List[str]]:
    """Generate predictions for emotion classification using the Spanish prompt."""

    preds: List[str] = []
    golds: List[str] = []

    df_to_process = df.head(max_samples) if max_samples is not None else df
    total_samples = len(df_to_process)

    model.eval()
    device = model.device

    for _, row in tqdm(
        df_to_process.iterrows(),
        total=total_samples,
        desc="Generating Predictions"
    ):
        prompt = f"Clasifica la emoción en esta frase: {row['text']}\nEmoción:"
        inputs = tokenizer(prompt, return_tensors="pt").to(device)

        if "token_type_ids" in inputs:
            inputs.pop("token_type_ids")

        with torch.no_grad():
            output = model.generate(
                **inputs,
                max_new_tokens=10,
                pad_token_id=tokenizer.eos_token_id,
                temperature=0.7,
                do_sample=True,
            )

        input_length = inputs['input_ids'].shape[1]
        pred_text = tokenizer.decode(
            output[0][input_length:], skip_special_tokens=True
        ).strip().lower()

        preds.append(pred_text)
        golds.append(str(row["labels"]))

    return preds, golds


def compute_metrics_numeric(
    preds: List[str], golds: List[str]
) -> Dict[str, float]:
    """Compute standard classification metrics for multilabel classification."""

    y_true = np.zeros((len(golds), len(ALL_LABELS)))
    y_pred = np.zeros((len(golds), len(ALL_LABELS)))

    for i, (g, p) in enumerate(zip(golds, preds)):
        true_ids = [s.strip() for s in str(g).split(",") if s.strip().isdigit()]
        pred_ids = re.findall(r'\b[1-8]\b', str(p))

        for t in true_ids:
            if t in ALL_LABELS:
                y_true[i, ALL_LABELS.index(t)] = 1
        for t in pred_ids:
            if t in ALL_LABELS:
                y_pred[i, ALL_LABELS.index(t)] = 1

    metrics = {
        "micro_f1": f1_score(
            y_true, y_pred, average="micro", zero_division=0
        ),
        "macro_f1": f1_score(
            y_true, y_pred, average="macro", zero_division=0
        ),
        "jaccard": jaccard_score(
            y_true, y_pred, average="samples", zero_division=0
        ),
        "hamming": hamming_loss(y_true, y_pred),
    }
    return metrics


def preprocess_function_es(
    examples: Dict[str, List[Any]], tokenizer: AutoTokenizer
) -> Dict[str, Any]:
    """Crea muestras de entrenamiento con instrucciones y etiquetas de emoción."""

    texts = [
        f"Clasifica la emoción en esta frase: {t}\nEmoción: {l}"
        for t, l in zip(examples["text"], examples["labels"])
    ]
    return tokenizer(
        texts,
        truncation=True,
        max_length=MAX_INPUT_LENGTH,
        padding="max_length"
    )


def main():
    """Executes the LoRA fine-tuning and evaluation pipeline."""

    print(f"--- Loading Spanish Data (lang: {LANGUAGE}) ---")
    try:
        train_df = pd.read_csv(os.path.join(BASE_PATH, f"train_{LANGUAGE}.csv"))
        test_df = pd.read_csv(os.path.join(BASE_PATH, f"test_{LANGUAGE}.csv"))
        print(f"Train samples: {len(train_df)}")
        print(f"Test samples: {len(test_df)}")
    except FileNotFoundError:
        print(
            f"Error: Could not find data files for language '{LANGUAGE}'. Exiting."
        )
        return

    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, padding_side='left')
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    os.makedirs(os.path.dirname(SAVE_DIR_FINAL), exist_ok=True)

    print("--- Tokenizing Data ---")
    train_dataset = Dataset.from_pandas(train_df)
    test_dataset = Dataset.from_pandas(test_df)

    num_proc = os.cpu_count() or 1
    preprocess_func = lambda x: preprocess_function_es(x, tokenizer)

    tokenized_train = train_dataset.map(
        preprocess_func,
        batched=True,
        remove_columns=train_dataset.column_names,
        num_proc=num_proc
    )
    tokenized_test = test_dataset.map(
        preprocess_func,
        batched=True,
        remove_columns=test_dataset.column_names,
        num_proc=num_proc
    )
    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

    print(f"\n--- Running Zero-Shot Baseline Evaluation on {MODEL_NAME} ---")

    base_model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.bfloat16 if torch.cuda.is_available() else None
    )
    base_model.resize_token_embeddings(len(tokenizer))
    base_model.to(device)
    base_model.config.pad_token_id = tokenizer.eos_token_id

    base_preds, base_golds = predict_emotions_es(
        base_model, tokenizer, test_df, max_samples=EVAL_SAMPLES_COUNT_CV
    )
    base_metrics = compute_metrics_numeric(base_preds, base_golds)
    print(f"Base Model Metrics ({MODEL_NAME}):")
    print(base_metrics)

    del base_model
    if device.type == 'cuda':
        torch.cuda.empty_cache()
    gc.collect()

    print(f"\n--- Setting up LoRA Fine-Tuning with {MODEL_NAME} ---")

    base_model_ft = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.bfloat16 if torch.cuda.is_available() else None
    )
    base_model_ft.resize_token_embeddings(len(tokenizer))
    base_model_ft.to(device)
    base_model_ft.config.pad_token_id = tokenizer.eos_token_id

    lora_config = LoraConfig(
        r=PARAMS["r"],
        lora_alpha=PARAMS["lora_alpha"],
        target_modules=LORA_TARGET_MODULES,
        lora_dropout=PARAMS["lora_dropout"],
        bias="none",
        task_type="CAUSAL_LM"
    )
    model = get_peft_model(base_model_ft, lora_config)
    model.print_trainable_parameters()

    output_dir = os.path.join(
        OUTPUT_DIR_ROOT, f"final_lora_falcon_{LANGUAGE}_best"
    )
    logging_dir = os.path.join(LOG_DIR_ROOT, "final")
    training_args = TrainingArguments(
        output_dir=output_dir,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        learning_rate=LEARNING_RATE,
        num_train_epochs=5,
        eval_strategy="epoch",
        save_strategy="epoch",
        logging_dir=logging_dir,
        report_to="none",
        fp16=True if torch.cuda.is_available() and device.type != 'cpu' else False
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_test,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )

    print(f"\n--- Starting LoRA Fine-Tuning with {MODEL_NAME} (Spanish) ---")
    trainer.train()

    print("\n--- Running Final Fine-Tuned Model Evaluation ---")

    preds, golds = predict_emotions_es(
        model, tokenizer, test_df, max_samples=EVAL_SAMPLES_COUNT_CV
    )
    metrics = compute_metrics_numeric(preds, golds)
    print(f"Fine-Tuned Model Metrics ({MODEL_NAME} - Final):")
    print(metrics)

    os.makedirs(os.path.dirname(SAVE_DIR_FINAL), exist_ok=True)
    model.save_pretrained(SAVE_DIR_FINAL)
    tokenizer.save_pretrained(SAVE_DIR_FINAL)
    print(f"\nFinal fine-tuned model saved to {SAVE_DIR_FINAL}")

    del model, base_model_ft, trainer
    gc.collect()
    torch.cuda.empty_cache()


if __name__ == "__main__":
    main()