In [None]:
import gc
import os
import re
import warnings
from typing import Any, Dict, List, Optional

import numpy as np
import pandas as pd
import torch
from datasets import Dataset
from IPython.display import display
from peft import LoraConfig, get_peft_model
from sklearn.metrics import f1_score, hamming_loss, jaccard_score
from tqdm import tqdm
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    DataCollatorForLanguageModeling,
    Trainer,
    TrainingArguments,
)

warnings.filterwarnings("ignore")

lang = "fr"
base_path = "XED/processed"
model_name = "Qwen/Qwen2.5-3B-Instruct"

PARAMS = {
    "r": 8,
    "lora_alpha": 32,
    "lora_dropout": 0.10
}

LORA_TARGET_MODULES = [
    "q_proj", "k_proj", "v_proj", "o_proj",
    "up_proj", "down_proj", "gate_proj"
]


def predict_emotions_fr(
    model, tokenizer, df, max_samples=300
) -> (List[str], List[str]):
    preds, golds = [], []
    model.eval()
    for _, row in tqdm(
        df.head(max_samples).iterrows(), total=min(len(df), max_samples)
    ):
        prompt = f"Classez l'émotion dans cette phrase : {row['text']}\nÉmotion :"
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

        if "token_type_ids" in inputs:
            inputs.pop("token_type_ids")

        with torch.no_grad():
            output = model.generate(
                **inputs,
                max_new_tokens=10,
                pad_token_id=tokenizer.eos_token_id,
                temperature=0.7,
                do_sample=True,
            )

        pred_text = tokenizer.decode(
            output[0][inputs['input_ids'].shape[1]:],
            skip_special_tokens=True
        ).strip().lower()
        preds.append(pred_text)
        golds.append(str(row["labels"]))
    return preds, golds


def compute_metrics_numeric(preds, golds):
    all_labels = [str(i) for i in range(1, 9)]
    y_true = np.zeros((len(golds), len(all_labels)))
    y_pred = np.zeros((len(golds), len(all_labels)))

    for i, (g, p) in enumerate(zip(golds, preds)):
        true_ids = [s.strip() for s in str(g).split(",") if s.strip().isdigit()]
        pred_ids = re.findall(r'\b[1-8]\b', str(p))

        for t in true_ids:
            if t in all_labels:
                y_true[i, all_labels.index(t)] = 1
        for t in pred_ids:
            if t in all_labels:
                y_pred[i, all_labels.index(t)] = 1

    metrics = {
        "micro_f1": f1_score(
            y_true, y_pred, average="micro", zero_division=0
        ),
        "macro_f1": f1_score(
            y_true, y_pred, average="macro", zero_division=0
        ),
        "jaccard": jaccard_score(
            y_true, y_pred, average="samples", zero_division=0
        ),
        "hamming": hamming_loss(y_true, y_pred),
    }
    return metrics


def preprocess_function_fr(examples):
    texts = [
        f"Classez l'émotion dans cette phrase : {t}\nÉmotion : {l}"
        for t, l in zip(examples["text"], examples["labels"])
    ]
    return tokenizer(
        texts,
        truncation=True,
        max_length=256,
        padding="max_length"
    )


print(f"--- Loading French Data (lang: {lang}) ---")
try:
    train_df = pd.read_csv(os.path.join(base_path, f"train_{lang}.csv"))
    test_df = pd.read_csv(os.path.join(base_path, f"test_{lang}.csv"))
    print(f"Train samples: {len(train_df)}")
    print(f"Test samples: {len(test_df)}")
except FileNotFoundError:
    print(
        f"Error: Could not find data files for language '{lang}'. "
        f"Please ensure 'train_{lang}.csv' and 'test_{lang}.csv' "
        f"exist in '{base_path}'."
    )
    exit()

tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side='left')
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("--- Tokenizing Data ---")
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)
tokenized_train = train_dataset.map(
    preprocess_function_fr,
    batched=True,
    remove_columns=train_dataset.column_names
)
tokenized_test = test_dataset.map(
    preprocess_function_fr,
    batched=True,
    remove_columns=test_dataset.column_names
)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

print(f"\n--- Running Zero-Shot Baseline Evaluation on {model_name} ---")
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else None
)
base_model.resize_token_embeddings(len(tokenizer))
base_model.to(device)
base_model.config.pad_token_id = tokenizer.eos_token_id

base_preds, base_golds = predict_emotions_fr(
    base_model, tokenizer, test_df, max_samples=300
)
base_metrics = compute_metrics_numeric(base_preds, base_golds)
print(f"Base Model Metrics ({model_name}):")
print(base_metrics)

del base_model
if device.type == 'cuda':
    torch.cuda.empty_cache()

print(f"\n--- Setting up LoRA Fine-Tuning with {model_name} and Best Params ---")
base_model_ft = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else None
)
base_model_ft.resize_token_embeddings(len(tokenizer))
base_model_ft.to(device)
base_model_ft.config.pad_token_id = tokenizer.eos_token_id

lora_config = LoraConfig(
    r=PARAMS["r"],
    lora_alpha=PARAMS["lora_alpha"],
    target_modules=LORA_TARGET_MODULES,
    lora_dropout=PARAMS["lora_dropout"],
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(base_model_ft, lora_config)
model.print_trainable_parameters()

training_args = TrainingArguments(
    output_dir=f"./{lang}_gpt_results/final_lora_qwen_{lang}_best",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    learning_rate=2e-4,
    num_train_epochs=5,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir=f"./logs_qwen_{lang}_final",
    report_to="none",
    fp16=True if torch.cuda.is_available() and device.type != 'cpu' else False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

print(f"\n--- Starting LoRA Fine-Tuning with {model_name} (French) ---")
trainer.train()

print("\n--- Running Final Fine-Tuned Model Evaluation ---")
preds, golds = predict_emotions_fr(
    model, tokenizer, test_df, max_samples=300
)
metrics = compute_metrics_numeric(preds, golds)
print(f"Fine-Tuned Model Metrics ({model_name} - Final):")
print(metrics)

save_dir = f"./weights/qwen/lora_qwen_{lang}_best_final"
os.makedirs(save_dir, exist_ok=True)
model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)
print(f"\nFinal fine-tuned model saved to {save_dir}")