In [None]:
import os
import json
import numpy as np
import torch
from collections import defaultdict
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, MarianMTModel, MarianTokenizer
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_precision_recall_accuracy_score
import logging
import optuna
from tqdm import tqdm

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m33.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.3.0


In [None]:
# Thiết lập logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [None]:
def map_emoji_to_spanish(emoji=None):
    emoji_map = {
        "🔝": "arriba",
        "👎": "no me gusta",
        "😳": "sorprendido",
        "4️⃣": "cuatro",
        "🖐🏼": "mano abierta",
        "💎": "diamante",
        "🤣": "riendo fuerte",
        "🤞🏻": "dedos cruzados",
        "🍺": "cerveza",
        "❣": "corazón exclamación",
        "🤡": "payaso",
        "🎅🏻": "Papá Noel",
        "⬆": "subir",
        "💸": "dinero volando",
        "🤤": "babeando",
        "❌": "cruz",
        "🙌🏻": "manos arriba",
        "🤩": "asombrado",
        "🇵🇪": "Perú",
        "🤠": "vaquero",
        "🟣": "círculo morado",
        "🖐🏽": "mano abierta",
        "🙃": "cara invertida",
        "🐸": "rana",
        "👆🏼": "señalando arriba",
        "🈚": "gratis",
        "🌐": "mundo",
        "🎁": "regalo",
        "🎉": "celebración",
        "😵‍💫": "mareado",
        "🌝": "luna llena",
        "🙋‍♂": "hombre levantando mano",
        "3️⃣": "tres",
        "🔮": "bola de cristal",
        "😰": "nervioso",
        "😨": "miedo",
        "❓": "pregunta",
        "☝🏻": "dedo arriba",
        "🥲": "lágrimas de alegría",
        "✊🏼": "puño levantado",
        "✊": "puño",
        "🧘🏻‍♂": "meditación",
        "🧐": "curioso",
        "👏🏾": "aplausos",
        "🐳": "ballena",
        "💪🏼": "fuerza",
        "✅": "aprobado",
        "🤦🏼‍♂": "vergüenza",
        "😍": "enamorado",
        "👻": "fantasma",
        "😂": "riendo",
        "💪🏻": "fuerte",
        "🫤": "decepción",
        "⚽": "fútbol",
        "🥚": "huevo",
        "🙏": "rezando",
        "🤙": "llámame",
        "🙄": "aburrido",
        "😲": "asombro",
        "♥": "corazón",
        "🍎": "manzana",
        "🐻": "oso",
        "🤪": "loco",
        "👆🏽": "señalando arriba",
        "🎢": "montaña rusa",
        "🙌": "celebrando",
        "🌘": "luna menguante",
        "🫡": "saludo",
        "🙋🏻‍♀": "mujer levantando mano",
        "🤦‍♂": "error",
        "🌊": "ola",
        "😉": "guiño",
        "🥶": "frío",
        "💋": "beso",
        "🇺🇦": "Ucrania",
        "😶‍🌫": "confundido",
        "🌬": "viento",
        "💩": "mierda",
        "👌🏼": "perfecto",
        "🙆‍♂": "hombre OK",
        "💪🏽": "fuerza",
        "😱": "gritando",
        "1️⃣": "uno",
        "🤘": "rock",
        "👉": "señalando derecha",
        "🙂": "sonriendo",
        "👁": "ojo",
        "👀": "ojos",
        "🔥": "fuego",
        "⏺": "grabar",
        "😅": "sudando",
        "❗": "exclamación",
        "😕": "confuso",
        "🥒": "pepino",
        "🎂": "torta",
        "😥": "aliviado",
        "✌🏽": "victoria",
        "🎾": "tenis",
        "💚": "corazón verde",
        "💔": "corazón roto",
        "👍": "bien",
        "🐶": "perro",
        "✔": "verificado",
        "✌🏻": "paz",
        "💪": "músculo",
        "🎈": "globo",
        "🤑": "dinero en la cara",
        "😾": "gato enfadado",
        "💵": "billete",
        "👋🏻": "saludando",
        "👈🏻": "señalando izquierda",
        "💰": "bolsa de dinero",
        "🎼": "música",
        "🐮": "vaca",
        "🇦🇷": "Argentina",
        "🤷🏼‍♀": "mujer encogiéndose",
        "💃": "bailando",
        "🤮": "vomitando",
        "🇷🇺": "Rusia",
        "😎": "genial",
        "🥳": "fiesta",
        "⚰": "ataúd",
        "💯": "cien puntos",
        "📈": "gráfico subiendo",
        "😭": "llorando",
        "😪": "somnoliento",
        "🤞🏼": "suerte",
        "🤦🏽‍♂": "hombre avergonzado",
        "▶": "reproducir",
        "⛔": "prohibido",
        "🎶": "notas musicales",
        "🙊": "mono callado",
        "🌚": "luna nueva",
        "👏": "aplaudiendo",
        "🙏🏽": "rezando",
        "😄": "feliz",
        "🤦🏻‍♂": "error hombre",
        "🇨🇳": "China",
        "👌🏻": "OK",
        "🤙🏻": "llámame",
        "🇳🇬": "Nigeria",
        "😃": "alegre",
        "ℹ️": "información",
        "🗣": "hablando",
        "🙌🏼": "manos levantadas",
        "🤞": "cruzando dedos",
        "😜": "broma",
        "🎵": "nota musical",
        "🤟": "te amo",
        "✈": "avión",
        "👌🏽": "perfecto",
        "🤦🏽": "vergüenza",
        "👍🏾": "bien",
        "🔹": "diamante azul",
        "😝": "lengua fuera",
        "💶": "euro",
        "🤓": "nerd",
        "😶": "sin expresión",
        "🐁": "ratón",
        "🐗": "jabalí",
        "🤦🏻‍♀": "mujer avergonzada",
        "🍏": "manzana verde",
        "🟢": "círculo verde",
        "🙌🏽": "celebración",
        "🇪🇸": "España",
        "✨": "brillo",
        "🤷🏻‍♂": "hombre encogiéndose",
        "🚨": "alarma",
        "🥰": "amor",
        "☺": "sonrisa",
        "🤷‍♂": "duda",
        "🤯": "cabeza explotando",
        "🥺": "suplicando",
        "🐟": "pez",
        "🇮🇳": "India",
        "😐": "neutral",
        "😁": "sonriendo amplio",
        "🙋🏻‍♂": "levantando mano",
        "😓": "sudor",
        "🕺": "bailando",
        "😯": "sorprendido",
        "👉🏻": "señalando derecha",
        "💥": "explosión",
        "😢": "llorando",
        "🦖": "T-Rex",
        "⚡": "rayo",
        "😴": "durmiendo",
        "🫣": "espiando",
        "😻": "gato enamorado",
        "🥵": "caliente",
        "👍🏻": "pulgar arriba",
        "🇧🇾": "Bielorrusia",
        "🤷🏽‍♀": "mujer dudando",
        "😋": "saboreando",
        "🚫": "prohibido",
        "👅": "lengua",
        "😆": "riendo mucho",
        "😊": "sonriendo feliz",
        "😇": "ángel",
        "😠": "enojado",
        "🌎": "Américas",
        "⬇": "bajar",
        "😞": "triste",
        "🔵": "círculo azul",
        "📨": "correo",
        "👆": "arriba",
        "😘": "besando",
        "🌖": "luna gibosa",
        "❤": "corazón rojo",
        "☝": "dedo arriba",
        "✌": "victoria",
        "🍻": "brindis",
        "🤝": "apretón de manos",
        "👋": "saludo",
        "💲": "dólar",
        "👍🏼": "bien",
        "🚶🏻‍♂": "hombre caminando",
        "🤔": "pensando",
        "😹": "gato riendo",
        "🫵": "señalando",
        "🤭": "riendo callado",
        "🪂": "paracaídas",
        "😈": "diablo",
        "🔰": "principiante",
        "🫀": "corazón",
        "😒": "molesto",
        "🤷": "no sé",
        "😀": "felicidad",
        "🍀": "trébol",
        "🔪": "cuchillo",
        "😮": "boca abierta",
        "💬": "hablar",
        "✋": "mano levantada",
        "😌": "alivio",
        "💦": "sudor",
        "🤷🏼‍♂": "duda",
        "☹": "tristeza",
        "🤨": "sospecha",
        "🤙🏽": "llámame",
        "🔻": "triángulo abajo",
        "🛍": "compras",
        "🤧": "estornudo",
        "💫": "mareo",
        "👼": "ángel",
        "🤌": "pellizco",
        "💨": "rápido",
        "😛": "lengua fuera",
        "🎄": "árbol de Navidad",
        "🥹": "lágrimas contenidas",
        "☀": "sol",
        "🌕": "luna llena",
        "🇺🇸": "Estados Unidos",
        "👏🏼": "aplausos",
        "‼": "doble exclamación",
        "🚀": "cohete",
        "😡": "furioso",
        "😬": "nervios",
        "🔴": "círculo rojo",
        "🙏🏻": "orando",
        "🙈": "mono tapándose",
        "🦥": "perezoso",
        "🌙": "luna creciente",
        "👈": "señalando izquierda",
        "🐷": "cerdo",
        "🥸": "disfrazado",
        "😏": "sonrisa pícara",
        "😚": "beso cerrado",
        "⚓": "ancla",
        "👌": "OK",
        "🤟🏻": "te amo",
        "🌌": "vía láctea",
        "⚠": "advertencia",
        "🥱": "bostezando",
        "🐬": "delfín",
        "📊": "gráfico",
        "🐀": "rata",
        "🤗": "abrazo",
        "😔": "pensativo",
        "👏🏻": "aplaudiendo",
        "🇧🇬": "Bulgaria",
        "🥴": "mareado"
    }
    if emoji is None:
        return emoji_map  
    return emoji_map.get(emoji, emoji)  

def replace_emojis_in_text(text):
    result = text
    for emoji, spanish_text in map_emoji_to_spanish().items():
        result = result.replace(emoji, f" {spanish_text} ")
    return result.strip()

def preprocess_data(data_dir):
    subjects = defaultdict(list)
    with tqdm(total=len(os.listdir(data_dir)), desc="Loading JSON Files") as pbar:
        for filename in os.listdir(data_dir):
            if filename.endswith(".json"):
                with open(os.path.join(data_dir, filename), "r") as f:
                    messages = json.load(f)
                    nick = filename.split(".")[0]
                    subjects[nick] = [
                        replace_emojis_in_text(str(msg["message"]) if msg["message"] is not None else "")
                        for msg in messages
                    ]
                pbar.update(1)
    return subjects


In [None]:
def augment_data(subjects, labels, minority_class=3, save_path="/content/drive/MyDrive/data/task2/train/augmented_data.json"):
    if os.path.exists(save_path):
        with open(save_path, "r") as f:
            augmented_data = json.load(f)
        final_subjects = defaultdict(list)
        final_labels = {}
        for nick, data in augmented_data.items():
            final_subjects[nick] = data["messages"]
            final_labels[nick] = data["label"]
        return final_subjects, final_labels

    es_en_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-es-en")
    es_en_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-es-en")
    en_fr_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-fr")
    en_fr_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-fr")
    fr_es_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-fr-es")
    fr_es_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-fr-es")

    augmented_subjects = defaultdict(list)
    augmented_labels = []
    total_messages_to_translate = sum(len(messages) for nick, messages in subjects.items() if labels[nick] == minority_class)

    with tqdm(total=total_messages_to_translate, desc="Back Translation") as pbar:
        for nick, messages in subjects.items():
            label = labels[nick]
            augmented_subjects[nick] = messages
            augmented_labels.append(label)
            if label == minority_class:
                augmented_messages = []
                for msg in messages:
                    try:
                        es_en_inputs = es_en_tokenizer(msg, return_tensors="pt", padding=True, truncation=True, max_length=512)
                        es_en_outputs = es_en_model.generate(**es_en_inputs)
                        en_text = es_en_tokenizer.decode(es_en_outputs[0], skip_special_tokens=True)
                        en_fr_inputs = en_fr_tokenizer(en_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
                        en_fr_outputs = en_fr_model.generate(**en_fr_inputs)
                        fr_text = en_fr_tokenizer.decode(en_fr_outputs[0], skip_special_tokens=True)
                        fr_es_inputs = fr_es_tokenizer(fr_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
                        fr_es_outputs = fr_es_model.generate(**fr_es_inputs)
                        back_es_text = fr_es_tokenizer.decode(fr_es_outputs[0], skip_special_tokens=True)
                        augmented_messages.append(back_es_text)
                    except Exception as e:
                        augmented_messages.append(msg)
                    pbar.update(1)
                new_nick = f"{nick}_augment"
                augmented_subjects[new_nick] = augmented_messages
                augmented_labels.append(label)
            else:
                pbar.update(len(messages))

    final_subjects = augmented_subjects
    final_labels = dict(zip(final_subjects.keys(), augmented_labels))
    augmented_data = {nick: {"messages": messages, "label": final_labels[nick]} for nick, messages in final_subjects.items()}
    with open(save_path, "w") as f:
        json.dump(augmented_data, f, ensure_ascii=False, indent=4)
    return final_subjects, final_labels

In [None]:
class AddictionDataset(torch.utils.data.Dataset):
    def __init__(self, subjects, labels, tokenizer, max_length=256):
        self.subjects = subjects
        self.labels = [labels[nick] for nick in subjects.keys()]
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.texts = [' '.join(messages) for messages in subjects.values()]

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {
        "f1_macro": f1_score(labels, predictions, average='macro'),
        "f1_weighted": f1_score(labels, predictions, average='weighted'),
        "precision": precision_score(labels, predictions, average='macro'),
        "recall": recall_score(labels, predictions, average='macro'),
        "accuracy": accuracy_score(labels, predictions)
    }

In [None]:
def objective(trial):
    print(f"Starting Trial {trial.number}")

    learning_rate = trial.suggest_float("learning_rate", 3e-4, 5e-4, log=True)
    batch_size = trial.suggest_categorical("batch_size", [2, 4])
    num_epochs = trial.suggest_int("num_epochs", 4, 10)
    weight_decay = trial.suggest_float("weight_decay", 0.05, 0.1)
    print(f"Trial {trial.number} parameters: lr={learning_rate}, batch_size={batch_size}, "
          f"epochs={num_epochs}, weight_decay={weight_decay}")

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"Using device: {device}")
    if device == 'cuda':
        print(f"GPU memory allocated: {torch.cuda.memory_allocated(0) / 1024**3:.2f} GB")

    logger.info("Loading roberta-base-bne model...")
    print("Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained("PlanTL-GOB-ES/roberta-base-bne")
    print("Tokenizer loaded successfully")

    print("Loading data...")
    subjects, labels = load_data('/content/drive/MyDrive/data/task2/train/subjects')
    print(f"Loaded {len(subjects)} subjects and {len(labels)} labels")
    subjects, labels = augment_data(subjects, labels)
    print(f"After augmentation: {len(subjects)} subjects and {len(labels)} labels")

    print("Setting up Stratified K-Fold Cross-Validation...")
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    fold_scores = []

    for fold, (train_idx, val_idx) in enumerate(skf.split(list(subjects.keys()), list(labels.values()))):
        print(f"\n=== Trial {trial.number} - Starting Fold {fold + 1}/{skf.n_splits} ===")

        train_subjects = {list(subjects.keys())[i]: subjects[list(subjects.keys())[i]] for i in train_idx}
        val_subjects = {list(subjects.keys())[i]: subjects[list(subjects.keys())[i]] for i in val_idx}
        train_labels = {list(subjects.keys())[i]: labels[list(subjects.keys())[i]] for i in train_idx}
        val_labels = {list(subjects.keys())[i]: labels[list(subjects.keys())[i]] for i in val_idx}
        print(f"Trial {trial.number} - Fold {fold + 1}: Training samples: {len(train_subjects)}, "
              f"Validation samples: {len(val_subjects)}")

        print(f"Trial {trial.number} - Fold {fold + 1}: Creating datasets...")
        train_dataset = AddictionDataset(train_subjects, train_labels, tokenizer)
        val_dataset = AddictionDataset(val_subjects, val_labels, tokenizer)
        print(f"Trial {trial.number} - Fold {fold + 1}: Train dataset size: {len(train_dataset)}")
        print(f"Trial {trial.number} - Fold {fold + 1}: Validation dataset size: {len(val_dataset)}")


        print(f"Trial {trial.number} - Fold {fold + 1}: Loading model...")
        model = AutoModelForSequenceClassification.from_pretrained(
            "PlanTL-GOB-ES/roberta-base-bne",
            num_labels=4,
            ignore_mismatched_sizes=True,
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1
        )
        print(f"Trial {trial.number} - Fold {fold + 1}: Model loaded successfully")

        class_weights = torch.tensor([1.0, 1.0, 1.0, 2]).to(device)
        print(f"Trial {trial.number} - Fold {fold + 1}: Class weights: {class_weights}")

        print(f"Trial {trial.number} - Fold {fold + 1}: Setting up TrainingArguments...")
        training_args = TrainingArguments(
            output_dir=f'./results/trial_{trial.number}_fold_{fold + 1}',
            num_train_epochs=num_epochs,
            per_device_train_batch_size=batch_size,
            per_device_eval_batch_size=batch_size,
            warmup_steps=500,
            weight_decay=weight_decay,
            logging_dir=f'./logs/trial_{trial.number}_fold_{fold + 1}',
            logging_steps=10,
            eval_strategy="epoch",
            save_strategy="epoch",
            load_best_model_at_end=True,
            metric_for_best_model="f1_macro",
            greater_is_better=True,
            fp16=True if torch.cuda.is_available() else False,
        )
        print(f"Trial {trial.number} - Fold {fold + 1}: TrainingArguments configured: "
              f"epochs={num_epochs}, batch_size={batch_size}")

        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)
            metrics = {
                "f1_macro": f1_score(labels, predictions, average='macro'),
                "f1_weighted": f1_score(labels, predictions, average='weighted'),
                "precision": precision_score(labels, predictions, average='macro'),
                "recall": recall_score(labels, predictions, average='macro'),
                "accuracy": accuracy_score(labels, predictions)
            }
            print(f"Trial {trial.number} - Fold {fold + 1} Evaluation Metrics: {metrics}")
            return metrics

        print(f"Trial {trial.number} - Fold {fold + 1}: Initializing WeightedTrainer...")
        class WeightedTrainer(Trainer):
            def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
                labels = inputs.pop("labels")
                outputs = model(**inputs)
                logits = outputs.logits
                loss_fct = torch.nn.CrossEntropyLoss(weight=class_weights)
                loss = loss_fct(logits, labels)
                return (loss, outputs) if return_outputs else loss

        trainer = WeightedTrainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            compute_metrics=compute_metrics
        )
        print(f"Trial {trial.number} - Fold {fold + 1}: WeightedTrainer initialized")

        logger.info(f"Trial {trial.number} - Fold {fold + 1}: Training model...")
        print(f"Trial {trial.number} - Fold {fold + 1}: Starting training...")
        try:
            trainer.train()
            print(f"Trial {trial.number} - Fold {fold + 1}: Training completed")
        except Exception as e:
            print(f"Trial {trial.number} - Fold {fold + 1}: Error during training: {str(e)}")
            raise e

        logger.info(f"Trial {trial.number} - Fold {fold + 1}: Evaluating model...")
        print(f"Trial {trial.number} - Fold {fold + 1}: Evaluating model...")
        try:
            eval_results = trainer.evaluate()
            print(f"Trial {trial.number} - Fold {fold + 1} Results: {eval_results}")
            fold_scores.append(eval_results['eval_f1_macro'])
        except Exception as e:
            print(f"Trial {trial.number} - Fold {fold + 1}: Error during evaluation: {str(e)}")
            raise e
    avg_f1 = np.mean(fold_scores)
    print(f"Trial {trial.number} - Average F1 Macro across folds: {avg_f1:.4f}")
    return avg_f1

In [None]:
def train_and_save_model(data_dir, output_dir='./best_model'):
    logger.info("Optimizing hyperparameters with Optuna...")
    study = optuna.create_study(direction="maximize")
    with tqdm(total=20, desc="Optuna Trials") as pbar:
        def callback(study, trial):
            pbar.update(1)
        study.optimize(objective, n_trials=20, callbacks=[callback])

    best_params = study.best_params
    logger.info(f"Best hyperparameters: {best_params}")

    subjects, labels = load_data(data_dir)
    subjects, labels = augment_data(subjects, labels)

    logger.info("Loading final roberta-base-bne model...")
    tokenizer = AutoTokenizer.from_pretrained("PlanTL-GOB-ES/roberta-base-bne")

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    fold_scores = []
    best_f1 = 0.0
    best_model_dir = f"{output_dir}_best"

    for fold, (train_idx, val_idx) in enumerate(skf.split(list(subjects.keys()), list(labels.values()))):
        print(f"\n=== Starting Fold {fold + 1}/{skf.n_splits} ===")

        train_subjects = {list(subjects.keys())[i]: subjects[list(subjects.keys())[i]] for i in train_idx}
        val_subjects = {list(subjects.keys())[i]: subjects[list(subjects.keys())[i]] for i in val_idx}
        train_labels = {list(subjects.keys())[i]: labels[list(subjects.keys())[i]] for i in train_idx}
        val_labels = {list(subjects.keys())[i]: labels[list(subjects.keys())[i]] for i in val_idx}

        print(f"Fold {fold + 1}: Training samples: {len(train_subjects)}, Validation samples: {len(val_subjects)}")

        train_dataset = AddictionDataset(train_subjects, train_labels, tokenizer)
        val_dataset = AddictionDataset(val_subjects, val_labels, tokenizer)

        model = AutoModelForSequenceClassification.from_pretrained(
            "PlanTL-GOB-ES/roberta-base-bne",
            num_labels=4,
            ignore_mismatched_sizes=True,
            hidden_dropout_prob=0.2,
            attention_probs_dropout_prob=0.2
        )

        training_args = TrainingArguments(
            output_dir=f'./results/fold_{fold + 1}',
            num_train_epochs=best_params['num_epochs'],
            per_device_train_batch_size=best_params['batch_size'],
            per_device_eval_batch_size=best_params['batch_size'],
            warmup_steps=500,
            weight_decay=best_params['weight_decay'],
            learning_rate=best_params['learning_rate'],
            logging_dir=f'./logs/fold_{fold + 1}',
            logging_steps=10,
            eval_strategy="epoch",
            save_strategy="epoch",
            load_best_model_at_end=True,
            metric_for_best_model="f1_macro",
            greater_is_better=True,
            gradient_accumulation_steps=2,
            fp16=True if torch.cuda.is_available() else False,
        )

        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)
            metrics = {
                "f1_macro": f1_score(labels, predictions, average='macro'),
                "f1_weighted": f1_score(labels, predictions, average='weighted'),
                "precision": precision_score(labels, predictions, average='macro'),
                "recall": recall_score(labels, predictions, average='macro'),
                "accuracy": accuracy_score(labels, predictions)
            }
            return metrics

        class_weights = torch.tensor([1.0, 1.0, 1.0, 1.5]).to('cuda' if torch.cuda.is_available() else 'cpu')

        class WeightedTrainer(Trainer):
            def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
                labels = inputs.pop("labels")
                outputs = model(**inputs)
                logits = outputs.logits
                loss_fct = torch.nn.CrossEntropyLoss(weight=class_weights)
                loss = loss_fct(logits, labels)
                return (loss, outputs) if return_outputs else loss

        trainer = WeightedTrainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            compute_metrics=compute_metrics
        )

        print(f"Fold {fold + 1}: Starting training...")
        trainer.train()

        print(f"Fold {fold + 1}: Evaluating model...")
        eval_results = trainer.evaluate()
        fold_scores.append(eval_results)

        print(f"Fold {fold + 1} Results: {eval_results}")

        if eval_results['eval_f1_macro'] > best_f1:
            best_f1 = eval_results['eval_f1_macro']
            print(f"Fold {fold + 1}: New best model found with F1 macro = {best_f1}. Saving model...")
            trainer.save_model(best_model_dir)
            tokenizer.save_pretrained(best_model_dir)

    avg_metrics = {
        "f1_macro": np.mean([score['eval_f1_macro'] for score in fold_scores]),
        "f1_weighted": np.mean([score['eval_f1_weighted'] for score in fold_scores]),
        "precision": np.mean([score['eval_precision'] for score in fold_scores]),
        "recall": np.mean([score['eval_recall'] for score in fold_scores]),
        "accuracy": np.mean([score['eval_accuracy'] for score in fold_scores])
    }

    print("\n=== Cross-Validation Summary ===")
    print(f"Average F1 Macro: {avg_metrics['f1_macro']:.4f}")
    print(f"Average F1 Weighted: {avg_metrics['f1_weighted']:.4f}")
    print(f"Average Precision: {avg_metrics['precision']:.4f}")
    print(f"Average Recall: {avg_metrics['recall']:.4f}")
    print(f"Average Accuracy: {avg_metrics['accuracy']:.4f}")
    print(f"Best model saved to {best_model_dir}")

    logger.info(f"Cross-Validation completed. Average F1 Macro: {avg_metrics['f1_macro']:.4f}")
    logger.info(f"Best model saved to {best_model_dir}")

In [None]:
if __name__ == "__main__":
    data_dir = '/content/drive/MyDrive/data/task2/train/subjects'
    train_and_save_model(data_dir)

[I 2025-04-20 09:02:48,074] A new study created in memory with name: no-name-50c919f9-f454-49e9-9284-be58b56fb81b
Optuna Trials:   0%|          | 0/20 [00:00<?, ?it/s]

Starting Trial 0
Trial 0 parameters: lr=0.00031216163589308947, batch_size=4, epochs=7, weight_decay=0.060626921945855616
Using device: cuda
GPU memory allocated: 0.00 GB
Loading tokenizer...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/851k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/509k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.21M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/957 [00:00<?, ?B/s]

Tokenizer loaded successfully
Loading data...



Loading JSON Files:   0%|          | 0/350 [00:00<?, ?it/s][A
Loading JSON Files:   0%|          | 1/350 [00:00<05:16,  1.10it/s][A
Loading JSON Files:   1%|          | 2/350 [00:01<04:19,  1.34it/s][A
Loading JSON Files:   1%|          | 3/350 [00:02<04:02,  1.43it/s][A
Loading JSON Files:   1%|          | 4/350 [00:02<03:44,  1.54it/s][A
Loading JSON Files:   1%|▏         | 5/350 [00:05<07:18,  1.27s/it][A
Loading JSON Files:   2%|▏         | 6/350 [00:05<05:19,  1.08it/s][A
Loading JSON Files:   2%|▏         | 7/350 [00:05<04:05,  1.40it/s][A
Loading JSON Files:   5%|▍         | 16/350 [00:05<00:48,  6.82it/s][A
Loading JSON Files:   7%|▋         | 26/350 [00:05<00:22, 14.22it/s][A
Loading JSON Files:   9%|▉         | 33/350 [00:06<00:22, 14.34it/s][A
Loading JSON Files:  11%|█         | 37/350 [00:06<00:19, 15.90it/s][A
Loading JSON Files:  15%|█▍        | 52/350 [00:06<00:09, 31.01it/s][A
Loading JSON Files:  20%|█▉        | 69/350 [00:06<00:05, 49.72it/s][A
Loading

Loaded 350 subjects and 350 labels
After augmentation: 376 subjects and 376 labels
Setting up Stratified K-Fold Cross-Validation...

=== Trial 0 - Starting Fold 1/5 ===
Trial 0 - Fold 1: Training samples: 300, Validation samples: 76
Trial 0 - Fold 1: Creating datasets...
Trial 0 - Fold 1: Train dataset size: 300
Trial 0 - Fold 1: Validation dataset size: 76
Trial 0 - Fold 1: Loading model...


config.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/roberta-base-bne and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trial 0 - Fold 1: Model loaded successfully
Trial 0 - Fold 1: Class weights: tensor([1., 1., 1., 2.], device='cuda:0')
Trial 0 - Fold 1: Setting up TrainingArguments...
Trial 0 - Fold 1: TrainingArguments configured: epochs=7, batch_size=4
Trial 0 - Fold 1: Initializing WeightedTrainer...


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]



Trial 0 - Fold 1: WeightedTrainer initialized
Trial 0 - Fold 1: Starting training...


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mnguyenxuanphuc010205[0m ([33mnguyenxuanphuc010205-uit[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,F1 Macro,F1 Weighted,Precision,Recall,Accuracy
1,1.3931,1.371103,0.131068,0.186254,0.088816,0.25,0.355263
2,1.2806,1.252229,0.131068,0.186254,0.088816,0.25,0.355263
3,1.0127,0.832638,0.63293,0.636363,0.590533,0.699495,0.723684
4,0.7229,0.596835,0.636044,0.634525,0.626736,0.679654,0.710526
5,0.3508,0.316733,0.911748,0.908978,0.918737,0.916271,0.907895
6,0.3716,0.166434,0.952881,0.948817,0.952381,0.960317,0.947368
7,0.72,0.061701,0.98814,0.986906,0.986111,0.990741,0.986842


Trial 0 - Fold 1 Evaluation Metrics: {'f1_macro': 0.13106796116504854, 'f1_weighted': 0.1862544711292795, 'precision': 0.08881578947368421, 'recall': 0.25, 'accuracy': 0.35526315789473684}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Trial 0 - Fold 1 Evaluation Metrics: {'f1_macro': 0.13106796116504854, 'f1_weighted': 0.1862544711292795, 'precision': 0.08881578947368421, 'recall': 0.25, 'accuracy': 0.35526315789473684}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Trial 0 - Fold 1 Evaluation Metrics: {'f1_macro': 0.6329297571178177, 'f1_weighted': 0.6363631782828553, 'precision': 0.5905330882352942, 'recall': 0.6994949494949495, 'accuracy': 0.7236842105263158}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Trial 0 - Fold 1 Evaluation Metrics: {'f1_macro': 0.636043956043956, 'f1_weighted': 0.6345247734721419, 'precision': 0.6267361111111112, 'recall': 0.6796536796536796, 'accuracy': 0.7105263157894737}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Trial 0 - Fold 1 Evaluation Metrics: {'f1_macro': 0.9117477134394427, 'f1_weighted': 0.908978058612012, 'precision': 0.9187370600414079, 'recall': 0.9162705486234898, 'accuracy': 0.9078947368421053}
Trial 0 - Fold 1 Evaluation Metrics: {'f1_macro': 0.9528807671977648, 'f1_weighted': 0.9488170513121132, 'precision': 0.9523809523809523, 'recall': 0.9603174603174602, 'accuracy': 0.9473684210526315}
Trial 0 - Fold 1 Evaluation Metrics: {'f1_macro': 0.9881401617250674, 'f1_weighted': 0.9869059441055469, 'precision': 0.9861111111111112, 'recall': 0.9907407407407407, 'accuracy': 0.9868421052631579}
Trial 0 - Fold 1: Training completed
Trial 0 - Fold 1: Evaluating model...


Trial 0 - Fold 1 Evaluation Metrics: {'f1_macro': 0.9881401617250674, 'f1_weighted': 0.9869059441055469, 'precision': 0.9861111111111112, 'recall': 0.9907407407407407, 'accuracy': 0.9868421052631579}
Trial 0 - Fold 1 Results: {'eval_loss': 0.06170148029923439, 'eval_f1_macro': 0.9881401617250674, 'eval_f1_weighted': 0.9869059441055469, 'eval_precision': 0.9861111111111112, 'eval_recall': 0.9907407407407407, 'eval_accuracy': 0.9868421052631579, 'eval_runtime': 0.5709, 'eval_samples_per_second': 133.116, 'eval_steps_per_second': 33.279, 'epoch': 7.0}

=== Trial 0 - Starting Fold 2/5 ===
Trial 0 - Fold 2: Training samples: 301, Validation samples: 75
Trial 0 - Fold 2: Creating datasets...
Trial 0 - Fold 2: Train dataset size: 301
Trial 0 - Fold 2: Validation dataset size: 75
Trial 0 - Fold 2: Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/roberta-base-bne and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trial 0 - Fold 2: Model loaded successfully
Trial 0 - Fold 2: Class weights: tensor([1., 1., 1., 2.], device='cuda:0')
Trial 0 - Fold 2: Setting up TrainingArguments...
Trial 0 - Fold 2: TrainingArguments configured: epochs=7, batch_size=4
Trial 0 - Fold 2: Initializing WeightedTrainer...
Trial 0 - Fold 2: WeightedTrainer initialized
Trial 0 - Fold 2: Starting training...


Epoch,Training Loss,Validation Loss,F1 Macro,F1 Weighted,Precision,Recall,Accuracy
1,1.3861,1.378841,0.392641,0.432984,0.404515,0.417102,0.48
2,1.3752,1.332018,0.132353,0.190588,0.09,0.25,0.36
3,1.1749,0.974654,0.641917,0.631028,0.637755,0.6875,0.706667
4,0.8795,0.561551,0.663095,0.663016,0.6375,0.714773,0.746667
5,0.3805,0.331394,0.905441,0.892778,0.90254,0.909586,0.893333
6,0.5993,0.502738,0.848414,0.837469,0.880853,0.861953,0.84
7,0.004,0.002613,1.0,1.0,1.0,1.0,1.0


Trial 0 - Fold 2 Evaluation Metrics: {'f1_macro': 0.39264112903225806, 'f1_weighted': 0.4329838709677419, 'precision': 0.4045145330859617, 'recall': 0.4171023965141612, 'accuracy': 0.48}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Trial 0 - Fold 2 Evaluation Metrics: {'f1_macro': 0.1323529411764706, 'f1_weighted': 0.19058823529411767, 'precision': 0.09, 'recall': 0.25, 'accuracy': 0.36}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Trial 0 - Fold 2 Evaluation Metrics: {'f1_macro': 0.6419172932330827, 'f1_weighted': 0.6310275689223058, 'precision': 0.6377551020408163, 'recall': 0.6875, 'accuracy': 0.7066666666666667}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Trial 0 - Fold 2 Evaluation Metrics: {'f1_macro': 0.6630952380952381, 'f1_weighted': 0.663015873015873, 'precision': 0.6375, 'recall': 0.7147727272727272, 'accuracy': 0.7466666666666667}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Trial 0 - Fold 2 Evaluation Metrics: {'f1_macro': 0.9054409005628518, 'f1_weighted': 0.8927779862414009, 'precision': 0.9025396825396825, 'recall': 0.9095860566448801, 'accuracy': 0.8933333333333333}
Trial 0 - Fold 2 Evaluation Metrics: {'f1_macro': 0.8484143763213531, 'f1_weighted': 0.8374686398872445, 'precision': 0.8808528428093645, 'recall': 0.861952861952862, 'accuracy': 0.84}
Trial 0 - Fold 2 Evaluation Metrics: {'f1_macro': 1.0, 'f1_weighted': 1.0, 'precision': 1.0, 'recall': 1.0, 'accuracy': 1.0}
Trial 0 - Fold 2: Training completed
Trial 0 - Fold 2: Evaluating model...


Trial 0 - Fold 2 Evaluation Metrics: {'f1_macro': 1.0, 'f1_weighted': 1.0, 'precision': 1.0, 'recall': 1.0, 'accuracy': 1.0}
Trial 0 - Fold 2 Results: {'eval_loss': 0.0026129449252039194, 'eval_f1_macro': 1.0, 'eval_f1_weighted': 1.0, 'eval_precision': 1.0, 'eval_recall': 1.0, 'eval_accuracy': 1.0, 'eval_runtime': 0.7125, 'eval_samples_per_second': 105.258, 'eval_steps_per_second': 26.665, 'epoch': 7.0}

=== Trial 0 - Starting Fold 3/5 ===
Trial 0 - Fold 3: Training samples: 301, Validation samples: 75
Trial 0 - Fold 3: Creating datasets...
Trial 0 - Fold 3: Train dataset size: 301
Trial 0 - Fold 3: Validation dataset size: 75
Trial 0 - Fold 3: Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/roberta-base-bne and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trial 0 - Fold 3: Model loaded successfully
Trial 0 - Fold 3: Class weights: tensor([1., 1., 1., 2.], device='cuda:0')
Trial 0 - Fold 3: Setting up TrainingArguments...
Trial 0 - Fold 3: TrainingArguments configured: epochs=7, batch_size=4
Trial 0 - Fold 3: Initializing WeightedTrainer...
Trial 0 - Fold 3: WeightedTrainer initialized
Trial 0 - Fold 3: Starting training...


Epoch,Training Loss,Validation Loss,F1 Macro,F1 Weighted,Precision,Recall,Accuracy
1,1.3846,1.377472,0.307971,0.354589,0.325,0.373094,0.466667
2,1.2503,1.244691,0.132353,0.190588,0.09,0.25,0.36
3,1.2406,1.228182,0.531538,0.62839,0.597579,0.562092,0.666667
4,0.7326,0.455827,0.887559,0.894766,0.877182,0.907096,0.893333
5,0.4415,0.253258,0.892257,0.91588,0.933684,0.881481,0.92
6,0.3188,0.09846,0.98814,0.986731,0.986111,0.990741,0.986667
7,0.0053,0.045383,0.98814,0.986731,0.986111,0.990741,0.986667


Trial 0 - Fold 3 Evaluation Metrics: {'f1_macro': 0.3079710144927536, 'f1_weighted': 0.3545893719806763, 'precision': 0.325, 'recall': 0.3730936819172113, 'accuracy': 0.4666666666666667}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Trial 0 - Fold 3 Evaluation Metrics: {'f1_macro': 0.1323529411764706, 'f1_weighted': 0.19058823529411767, 'precision': 0.09, 'recall': 0.25, 'accuracy': 0.36}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Trial 0 - Fold 3 Evaluation Metrics: {'f1_macro': 0.5315384615384615, 'f1_weighted': 0.6283897435897436, 'precision': 0.5975790513833992, 'recall': 0.5620915032679739, 'accuracy': 0.6666666666666666}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Trial 0 - Fold 3 Evaluation Metrics: {'f1_macro': 0.8875593715747169, 'f1_weighted': 0.8947655583972719, 'precision': 0.8771816418875242, 'recall': 0.9070961718020542, 'accuracy': 0.8933333333333333}
Trial 0 - Fold 3 Evaluation Metrics: {'f1_macro': 0.8922565960609439, 'f1_weighted': 0.9158800941409637, 'precision': 0.9336842105263158, 'recall': 0.8814814814814815, 'accuracy': 0.92}
Trial 0 - Fold 3 Evaluation Metrics: {'f1_macro': 0.9881401617250674, 'f1_weighted': 0.9867313566936209, 'precision': 0.9861111111111112, 'recall': 0.9907407407407407, 'accuracy': 0.9866666666666667}
Trial 0 - Fold 3 Evaluation Metrics: {'f1_macro': 0.9881401617250674, 'f1_weighted': 0.9867313566936209, 'precision': 0.9861111111111112, 'recall': 0.9907407407407407, 'accuracy': 0.9866666666666667}
Trial 0 - Fold 3: Training completed
Trial 0 - Fold 3: Evaluating model...


Trial 0 - Fold 3 Evaluation Metrics: {'f1_macro': 0.9881401617250674, 'f1_weighted': 0.9867313566936209, 'precision': 0.9861111111111112, 'recall': 0.9907407407407407, 'accuracy': 0.9866666666666667}
Trial 0 - Fold 3 Results: {'eval_loss': 0.09845957905054092, 'eval_f1_macro': 0.9881401617250674, 'eval_f1_weighted': 0.9867313566936209, 'eval_precision': 0.9861111111111112, 'eval_recall': 0.9907407407407407, 'eval_accuracy': 0.9866666666666667, 'eval_runtime': 0.6798, 'eval_samples_per_second': 110.321, 'eval_steps_per_second': 27.948, 'epoch': 7.0}

=== Trial 0 - Starting Fold 4/5 ===
Trial 0 - Fold 4: Training samples: 301, Validation samples: 75
Trial 0 - Fold 4: Creating datasets...
Trial 0 - Fold 4: Train dataset size: 301
Trial 0 - Fold 4: Validation dataset size: 75
Trial 0 - Fold 4: Loading model...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/roberta-base-bne and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trial 0 - Fold 4: Model loaded successfully
Trial 0 - Fold 4: Class weights: tensor([1., 1., 1., 2.], device='cuda:0')
Trial 0 - Fold 4: Setting up TrainingArguments...
Trial 0 - Fold 4: TrainingArguments configured: epochs=7, batch_size=4
Trial 0 - Fold 4: Initializing WeightedTrainer...
Trial 0 - Fold 4: WeightedTrainer initialized
Trial 0 - Fold 4: Starting training...


Epoch,Training Loss,Validation Loss,F1 Macro,F1 Weighted,Precision,Recall,Accuracy
1,1.38,1.378431,0.244417,0.290422,0.257576,0.310458,0.4
2,1.276,1.244831,0.132353,0.190588,0.09,0.25,0.36
3,1.0584,0.97456,0.548509,0.59313,0.591749,0.6,0.693333
4,0.7829,0.493545,0.943677,0.945588,0.963636,0.930882,0.946667
5,0.3793,0.21088,0.924656,0.934631,0.915992,0.945767,0.933333


Trial 0 - Fold 4 Evaluation Metrics: {'f1_macro': 0.24441687344913152, 'f1_weighted': 0.2904218362282879, 'precision': 0.25757575757575757, 'recall': 0.3104575163398693, 'accuracy': 0.4}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Trial 0 - Fold 4 Evaluation Metrics: {'f1_macro': 0.1323529411764706, 'f1_weighted': 0.19058823529411767, 'precision': 0.09, 'recall': 0.25, 'accuracy': 0.36}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Trial 0 - Fold 4 Evaluation Metrics: {'f1_macro': 0.5485093167701863, 'f1_weighted': 0.5931296066252588, 'precision': 0.5917487684729064, 'recall': 0.6, 'accuracy': 0.6933333333333334}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Trial 0 - Fold 4 Evaluation Metrics: {'f1_macro': 0.9436767086508469, 'f1_weighted': 0.9455879759413536, 'precision': 0.9636363636363636, 'recall': 0.9308823529411764, 'accuracy': 0.9466666666666667}
Trial 0 - Fold 4 Evaluation Metrics: {'f1_macro': 0.9246562616127834, 'f1_weighted': 0.9346314876749661, 'precision': 0.9159919028340081, 'recall': 0.9457671957671958, 'accuracy': 0.9333333333333333}


[W 2025-04-20 09:17:05,959] Trial 0 failed with parameters: {'learning_rate': 0.00031216163589308947, 'batch_size': 4, 'num_epochs': 7, 'weight_decay': 0.060626921945855616} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "<ipython-input-13-d39d4cd5772d>", line 129, in objective
    trainer.train()
  File "/usr/local/lib/python3.11/dist-packages/transformers/trainer.py", line 2245, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/transformers/trainer.py", line 2611, in _inner_training_loop
    self.optimizer.step()
  File "/usr/local/lib/python3.11/dist-packages/accelerate/optimizer.py", line 165, in step
    self.scaler.step(self.optimizer, closure)
  File "/usr/local/lib/python3.11/dist-packages/torch/

KeyboardInterrupt: 

In [None]:
def predict_addiction(json_file_path, model_path='./best_model_best'):
    tokenizer = AutoTokenizer.from_pretrained("PlanTL-GOB-ES/roberta-base-bne")
    model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=4)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)
    model.eval()

    with open(json_file_path, 'r') as f:
        messages = json.load(f)

    processed_messages = [
        replace_emojis_in_text(str(msg["message"]) if msg["message"] is not None else "")
        for msg in messages
    ]
    combined_text = ' '.join(processed_messages)

    encoding = tokenizer(
        combined_text,
        add_special_tokens=True,
        max_length=256,
        return_token_type_ids=False,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )

    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=-1).item()

    addiction_types = {0: "betting", 1: "onlinegaming", 2: "trading", 3: "lootboxes"}
    return addiction_types[predicted_class]

In [None]:
if __name__ == "__main__":
    data_dir = '/content/drive/MyDrive/data/task2/train/subjects'
    train_and_save_model(data_dir)
    json_file_path = 'path_to_your_json_file.json'
    model_path = './results/trial_0_fold_3/checkpoint-380'
    try:
        addiction = predict_addiction(json_file_path, model_path)
        print(f"Loại nghiện: {addiction}")
    except Exception as e:
        print(f"Lỗi: {str(e)}")