In [None]:
# Core packages
!pip install pandas numpy scikit-learn emoji

# PyTorch (Colab usually has it, but just in case)
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# Hugging Face transformers
!pip install transformers

# For progress bars (optional but helpful)
!pip install tqdm

# For better model performance tracking (optional)
!pip install wandb

# If you want to use smaller models (optional)
!pip install sentencepiece

# For data visualization (optional)
!pip install matplotlib seaborn

# MLOps: experiment tracking and model registry
!pip install mlflow

Collecting emoji
  Downloading emoji-2.15.0-py3-none-any.whl.metadata (5.7 kB)
Downloading emoji-2.15.0-py3-none-any.whl (608 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m608.4/608.4 kB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji
Successfully installed emoji-2.15.0
Looking in indexes: https://download.pytorch.org/whl/cu118


In [None]:
# ============================================================
# 0) Imports
# ============================================================
import pandas as pd
import numpy as np
from collections import Counter
import math
import emoji
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    accuracy_score,
    precision_recall_fscore_support,
)
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

from transformers import (
    AutoTokenizer,
    AutoModel,
    get_cosine_schedule_with_warmup,
    get_linear_schedule_with_warmup,
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ============================================================
# MLflow: init tracking and experiment
# ============================================================
import mlflow
from mlflow_config import init_mlflow
from mlflow_utils import log_pytorch_model, log_label_encoder, log_training_config
init_mlflow()

# ============================================================
# 1) Load + normalize dataframe
# ============================================================
CSV_PATH = "balanced_emotion_dataset_smart.csv"

df = pd.read_csv(CSV_PATH)
df["text"] = df["text"].astype(str).str.strip().replace(r"\s+", " ", regex=True)

valid_emotions = ["joy", "sadness", "anger", "fear", "love", "surprise"]

def norm_label(e):
    e = str(e).strip().lower()
    if e in valid_emotions:
        return e
    if e == "happy":
        return "joy"
    if e in ["mad", "furious", "rage"]:
        return "anger"
    return None

df["hidden_emotion_label"] = df["hidden_emotion_label"].apply(norm_label)
df = df[df["hidden_emotion_label"].notna()].reset_index(drop=True)

df["hidden_flag_id"] = df["hidden_emotion_flag"].astype(int)

le = LabelEncoder()
df["emotion_id"] = le.fit_transform(df["hidden_emotion_label"])
print("Label order:", list(le.classes_))
print("\nClass distribution:")
print(df["hidden_emotion_label"].value_counts())
print(f"\nTotal samples: {len(df)}")

# Simple primary-emoji extraction
if "primary_emoji" not in df.columns:
    def first_emoji(s):
        s = str(s)
        for ch in s:
            if ch in emoji.EMOJI_DATA:
                return ch
        return ""
    df["primary_emoji"] = df["text"].apply(first_emoji)

# ============================================================
# 2) Train/val split with stratification
# ============================================================
X_train, X_val, y_train_em, y_val_em, y_train_hid, y_val_hid = train_test_split(
    df["text"],
    df["emotion_id"],
    df["hidden_flag_id"],
    test_size=0.2,
    random_state=42,
    stratify=df["emotion_id"],
)

train_emojis = df.loc[X_train.index, "primary_emoji"]
val_emojis   = df.loc[X_val.index, "primary_emoji"]

print(f"\nTraining samples: {len(X_train)}")
print(f"Validation samples: {len(X_val)}")

# ============================================================
# 3) Enhanced preprocessing with text augmentation
# ============================================================
negative_keywords = [
    "hate", "angry", "mad", "furious", "sad", "depressed", "terrible",
    "cry", "crying", "die", "dead", "kill", "killing", "awful",
    "annoying", "stupid", "idiot", "worst", "bad", "horrible"
]

positive_keywords = [
    "love", "happy", "joy", "great", "wonderful", "amazing",
    "excellent", "perfect", "best", "good", "nice", "fantastic"
]

def emoji_to_description(ch):
    if not ch:
        return ""
    desc = emoji.demojize(ch).strip(":").replace("_", " ")
    return desc

def has_negative_word(text):
    t = text.lower()
    return any(neg in t for neg in negative_keywords)

def has_positive_word(text):
    t = text.lower()
    return any(pos in t for pos in positive_keywords)

def build_input(text, emoji_char):
    """
    Enhanced preprocessing:
    1) Convert emoji to semantic description
    2) Detect emotion-text conflicts
    3) Add context tokens
    """
    text = str(text).strip()
    desc = emoji_to_description(emoji_char)
    token_prefixes = []

    if desc:
        token_prefixes.append(f"[EMOJI={desc}]")

    # Enhanced conflict detection
    if desc:
        # Positive emojis with negative text
        positive_emoji_cues = ["smile", "grin", "laugh", "heart", "joy", "relieved", "wink", "blush"]
        negative_emoji_cues = ["angry", "cry", "sad", "fear", "scared", "worried", "pouting"]

        is_positive_emoji = any(cue in desc for cue in positive_emoji_cues)
        is_negative_emoji = any(cue in desc for cue in negative_emoji_cues)

        if is_positive_emoji and has_negative_word(text):
            token_prefixes.append("[CONFLICT_POS_EMOJI_NEG_TEXT]")
        elif is_negative_emoji and has_positive_word(text):
            token_prefixes.append("[CONFLICT_NEG_EMOJI_POS_TEXT]")

        # Special handling for common emojis
        if "smiling" in desc or "grinning" in desc:
            token_prefixes.append("[SMILE_EMOJI]")
        elif "heart" in desc:
            token_prefixes.append("[HEART_EMOJI]")
        elif "crying" in desc or "tear" in desc:
            token_prefixes.append("[CRY_EMOJI]")
        elif "angry" in desc:
            token_prefixes.append("[ANGRY_EMOJI]")

    # Add length indicator for hidden emotion detection
    if len(text.split()) > 15:
        token_prefixes.append("[LONG_TEXT]")

    prefix = " ".join(token_prefixes)
    if prefix:
        return prefix + " " + text
    return text

# ============================================================
# 4) Enhanced Dataset with text augmentation for minority classes
# ============================================================
model_name = "microsoft/deberta-v3-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
print(f"\nTokenizer vocab size: {tokenizer.vocab_size}")

class EmotionHiddenDataset(Dataset):
    def __init__(self, texts, emo_ids, hid_ids, emojis, augment=False):
        self.texts = list(texts)
        self.emo_ids = list(emo_ids)
        self.hid_ids = list(hid_ids)
        self.emojis = list(emojis)
        self.augment = augment
        self.class_distribution = Counter(emo_ids)

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        raw_text = self.texts[idx]
        emoji_char = self.emojis[idx]
        emotion_id = self.emo_ids[idx]

        # Simple text augmentation for minority classes
        if self.augment and np.random.random() < 0.3:
            if emotion_id in [3, 4, 5]:  # fear, love, surprise (minority classes)
                # Add minor variations
                variations = [
                    f"I feel {raw_text}",
                    f"{raw_text} honestly",
                    f"To be honest, {raw_text}",
                    f"{raw_text} right now"
                ]
                raw_text = np.random.choice(variations)

        proc_text = build_input(raw_text, emoji_char)
        return proc_text, emotion_id, self.hid_ids[idx]

def collate_fn(batch):
    texts, emo_ids, hid_ids = zip(*batch)
    enc = tokenizer(
        list(texts),
        padding=True,
        truncation=True,
        max_length=128,
        return_tensors="pt",
    )
    enc["emotion_labels"] = torch.tensor(emo_ids, dtype=torch.long)
    enc["hidden_labels"] = torch.tensor(hid_ids, dtype=torch.float)
    return enc

# Create datasets
train_ds = EmotionHiddenDataset(X_train, y_train_em, y_train_hid, train_emojis, augment=True)
val_ds = EmotionHiddenDataset(X_val, y_val_em, y_val_hid, val_emojis, augment=False)

# ============================================================
# 5) Enhanced sampling strategy
# ============================================================
emo_counts = Counter(y_train_em)
print(f"\nTraining class counts: {dict(emo_counts)}")

# Option 1: Inverse frequency weighting
total_samples = len(y_train_em)
num_classes = len(emo_counts)
beta = 0.999  # Smoothing factor for effective number of samples

# Calculate effective number of samples
effective_num = 1.0 - np.power(beta, list(emo_counts.values()))
weights = (1.0 - beta) / np.array(effective_num)
weights = weights / np.sum(weights) * num_classes
class_weights = {i: float(w) for i, w in enumerate(weights)}

print("Class weights (effective num):", class_weights)

# Create sample weights
sample_weights = [class_weights[c] for c in y_train_em]
sampler = WeightedRandomSampler(
    sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

train_loader = DataLoader(
    train_ds,
    batch_size=32,
    sampler=sampler,
    collate_fn=collate_fn,
    num_workers=2 if device.type == "cuda" else 0
)
val_loader = DataLoader(
    val_ds,
    batch_size=64,
    shuffle=False,
    collate_fn=collate_fn,
    num_workers=2 if device.type == "cuda" else 0
)

# ============================================================
# 6) Enhanced Model Architecture
# ============================================================
class EnhancedEmotionHiddenModel(nn.Module):
    def __init__(self, base_name, num_emotions, dropout_p=0.3):
        super().__init__()
        self.encoder = AutoModel.from_pretrained(base_name)
        hidden_size = self.encoder.config.hidden_size

        # Emotion classification head (more complex)
        self.emotion_head = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.LayerNorm(hidden_size // 2),
            nn.GELU(),
            nn.Dropout(dropout_p),
            nn.Linear(hidden_size // 2, hidden_size // 4),
            nn.GELU(),
            nn.Dropout(dropout_p),
            nn.Linear(hidden_size // 4, num_emotions),
        )

        # Hidden flag head (simpler)
        self.hidden_head = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.GELU(),
            nn.Dropout(dropout_p),
            nn.Linear(hidden_size // 2, 1),
        )

        # Shared layers for better feature extraction
        self.shared_projection = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.GELU(),
            nn.Dropout(dropout_p),
        )

        # Initialize weights
        self._init_weights()

    def _init_weights(self):
        for module in [self.emotion_head, self.hidden_head, self.shared_projection]:
            for layer in module:
                if isinstance(layer, nn.Linear):
                    nn.init.xavier_uniform_(layer.weight)
                    if layer.bias is not None:
                        nn.init.zeros_(layer.bias)

    def forward(self, input_ids, attention_mask):
        outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)

        # Use mean pooling of all tokens (better than just CLS)
        hidden_states = outputs.last_hidden_state
        attention_mask_expanded = attention_mask.unsqueeze(-1).expand(hidden_states.size()).float()
        sum_embeddings = torch.sum(hidden_states * attention_mask_expanded, dim=1)
        sum_mask = torch.clamp(attention_mask_expanded.sum(dim=1), min=1e-9)
        pooled = sum_embeddings / sum_mask

        # Shared features
        shared_features = self.shared_projection(pooled)

        # Separate heads
        emo_logits = self.emotion_head(shared_features)
        hid_logits = self.hidden_head(shared_features).squeeze(-1)

        return emo_logits, hid_logits

num_emotions = len(le.classes_)
model = EnhancedEmotionHiddenModel(model_name, num_emotions, dropout_p=0.3).to(device)
print(f"\nModel parameters: {sum(p.numel() for p in model.parameters()):,}")

# ============================================================
# 7) Enhanced Loss Functions
# ============================================================
class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0, label_smoothing=0.1):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.label_smoothing = label_smoothing

    def forward(self, logits, targets):
        num_classes = logits.size(-1)

        # Apply label smoothing
        if self.label_smoothing > 0:
            with torch.no_grad():
                smooth_targets = torch.zeros_like(logits).scatter_(
                    1, targets.unsqueeze(1), 1.0
                )
                smooth_targets = smooth_targets * (1 - self.label_smoothing) + self.label_smoothing / num_classes

        # Calculate focal loss
        ce_loss = F.cross_entropy(logits, targets, reduction='none')
        pt = torch.exp(-ce_loss)

        if self.label_smoothing > 0:
            # With label smoothing
            log_probs = F.log_softmax(logits, dim=-1)
            ce_loss = -(smooth_targets * log_probs).sum(dim=-1)

        focal_weight = (1 - pt) ** self.gamma

        if self.alpha is not None:
            alpha_weight = self.alpha[targets]
            focal_weight = focal_weight * alpha_weight

        loss = focal_weight * ce_loss
        return loss.mean()

class EnhancedMultitaskLoss(nn.Module):
    def __init__(self, class_weights_dict, gamma=2.0, hidden_weight=1.0):
        super().__init__()
        # Convert class weights to tensor
        alpha_tensor = torch.zeros(len(class_weights_dict))
        for idx, weight in class_weights_dict.items():
            alpha_tensor[idx] = weight

        # Normalize alpha
        alpha_tensor = alpha_tensor / alpha_tensor.sum() * len(alpha_tensor)

        self.emo_loss = FocalLoss(
            alpha=alpha_tensor.to(device),
            gamma=gamma,
            label_smoothing=0.1
        )
        self.hid_loss = nn.BCEWithLogitsLoss(
            pos_weight=torch.tensor([2.0]).to(device)  # Adjust based on hidden flag ratio
        )
        self.hidden_weight = hidden_weight

    def forward(self, emo_logits, emo_targets, hid_logits, hid_targets):
        l_emo = self.emo_loss(emo_logits, emo_targets)
        l_hid = self.hid_loss(hid_logits, hid_targets)
        total_loss = l_emo + self.hidden_weight * l_hid
        return total_loss, l_emo, l_hid

# Create loss function with class weights
criterion = EnhancedMultitaskLoss(
    class_weights_dict=class_weights,
    gamma=1.5,  # Lower gamma for less aggressive focal loss
    hidden_weight=0.8  # Start with lower weight for hidden task
)

# ============================================================
# 8) Enhanced Optimizer and Scheduler
# ============================================================
# Freeze first few layers of encoder
for name, param in model.named_parameters():
    if "encoder.embeddings" in name or "encoder.encoder.layer.0" in name or "encoder.encoder.layer.1" in name:
        param.requires_grad = False

# Group parameters
encoder_params = []
head_params = []
for name, param in model.named_parameters():
    if param.requires_grad:
        if "encoder" in name:
            encoder_params.append(param)
        else:
            head_params.append(param)

print(f"\nTrainable encoder params: {len(encoder_params)}")
print(f"Trainable head params: {len(head_params)}")

optimizer = torch.optim.AdamW(
    [
        {"params": encoder_params, "lr": 2e-5, "weight_decay": 0.01},
        {"params": head_params, "lr": 5e-5, "weight_decay": 0.01},
    ],
    eps=1e-8,
    betas=(0.9, 0.999)
)

# Linear warmup + cosine annealing
num_epochs = 5
num_training_steps = num_epochs * len(train_loader)
num_warmup_steps = int(0.1 * num_training_steps)

scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=num_training_steps,
)

# ============================================================
# 9) Enhanced Evaluation with per-class metrics
# ============================================================
def evaluate_model(model, loader, criterion=None):
    model.eval()
    all_true_emo, all_pred_emo = [], []
    all_true_hid, all_pred_hid = [], []
    all_emo_probs = []

    total_loss = 0
    num_batches = 0

    with torch.no_grad():
        for batch in loader:
            batch = {
                k: v.to(device) if isinstance(v, torch.Tensor) else v
                for k, v in batch.items()
            }

            emo_logits, hid_logits = model(
                input_ids=batch["input_ids"],
                attention_mask=batch["attention_mask"],
            )

            if criterion is not None:
                loss, l_emo, l_hid = criterion(
                    emo_logits,
                    batch["emotion_labels"],
                    hid_logits,
                    batch["hidden_labels"],
                )
                total_loss += loss.item()
                num_batches += 1

            # Emotion predictions
            emo_probs = torch.softmax(emo_logits, dim=-1)
            emo_preds = emo_logits.argmax(dim=-1)

            # Hidden predictions
            hid_probs = torch.sigmoid(hid_logits)
            hid_preds = (hid_probs > 0.5).long()

            all_true_emo.extend(batch["emotion_labels"].cpu().numpy())
            all_pred_emo.extend(emo_preds.cpu().numpy())
            all_true_hid.extend(batch["hidden_labels"].cpu().numpy())
            all_pred_hid.extend(hid_preds.cpu().numpy())
            all_emo_probs.extend(emo_probs.cpu().numpy())

    # Calculate metrics
    print("=" * 60)
    print("EMOTION CLASSIFICATION REPORT")
    print("=" * 60)

    # Full classification report
    print(classification_report(
        all_true_emo,
        all_pred_emo,
        target_names=le.classes_,
        digits=3,
        zero_division=0
    ))

    # Per-class accuracy
    print("\nPER-CLASS ACCURACY:")
    cm = confusion_matrix(all_true_emo, all_pred_emo)
    for i, emotion in enumerate(le.classes_):
        total = cm[i].sum()
        correct = cm[i, i]
        acc = correct / total if total > 0 else 0
        print(f"{emotion:10s}: {acc:.3f} ({correct}/{total})")

    # Macro and weighted averages
    macro_acc = accuracy_score(all_true_emo, all_pred_emo)
    print(f"\nOverall Accuracy: {macro_acc:.3f}")

    # Hidden flag metrics
    print("\n" + "=" * 60)
    print("HIDDEN FLAG DETECTION")
    print("=" * 60)

    acc_hid = accuracy_score(all_true_hid, all_pred_hid)
    prec_hid, rec_hid, f1_hid, _ = precision_recall_fscore_support(
        all_true_hid,
        all_pred_hid,
        average="binary",
        pos_label=1,
        zero_division=0
    )

    print(f"Accuracy:  {acc_hid:.3f}")
    print(f"Precision: {prec_hid:.3f}")
    print(f"Recall:    {rec_hid:.3f}")
    print(f"F1-Score:  {f1_hid:.3f}")

    # Confusion matrix for hidden flag
    cm_hid = confusion_matrix(all_true_hid, all_pred_hid)
    print(f"\nConfusion Matrix (Hidden Flag):")
    print(cm_hid)

    if criterion is not None:
        avg_loss = total_loss / max(num_batches, 1)
        print(f"\nValidation Loss: {avg_loss:.4f}")

    return {
        "emo_accuracy": macro_acc,
        "hid_accuracy": acc_hid,
        "hid_f1": f1_hid,
        "emo_probs": all_emo_probs,
        "predictions": all_pred_emo
    }

# ============================================================
# 10) Enhanced Training Loop with early stopping
# ============================================================
def train_model():
    best_val_acc = 0
    patience = 3
    patience_counter = 0
    best_model_state = None

    print("\n" + "=" * 60)
    print("STARTING TRAINING")
    print("=" * 60)

    for epoch in range(num_epochs):
        print(f"\n{'='*40}")
        print(f"EPOCH {epoch + 1}/{num_epochs}")
        print(f"{'='*40}")

        # Training phase
        model.train()
        train_loss = 0
        emo_correct = 0
        hid_correct = 0
        total_samples = 0

        for batch_idx, batch in enumerate(train_loader):
            batch = {
                k: v.to(device) if isinstance(v, torch.Tensor) else v
                for k, v in batch.items()
            }

            optimizer.zero_grad()

            emo_logits, hid_logits = model(
                input_ids=batch["input_ids"],
                attention_mask=batch["attention_mask"],
            )

            loss, l_emo, l_hid = criterion(
                emo_logits,
                batch["emotion_labels"],
                hid_logits,
                batch["hidden_labels"],
            )

            loss.backward()

            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

            optimizer.step()
            scheduler.step()

            # Calculate batch accuracy
            emo_preds = emo_logits.argmax(dim=-1)
            hid_preds = (torch.sigmoid(hid_logits) > 0.5).long()

            emo_correct += (emo_preds == batch["emotion_labels"]).sum().item()
            hid_correct += (hid_preds == batch["hidden_labels"].long()).sum().item()
            total_samples += len(batch["emotion_labels"])
            train_loss += loss.item()

            if (batch_idx + 1) % 50 == 0:
                print(f"  Batch {batch_idx + 1}/{len(train_loader)} | "
                      f"Loss: {loss.item():.4f} | "
                      f"Emo Acc: {emo_correct/total_samples:.3f} | "
                      f"Hid Acc: {hid_correct/total_samples:.3f}")

        avg_train_loss = train_loss / len(train_loader)
        train_emo_acc = emo_correct / total_samples
        train_hid_acc = hid_correct / total_samples

        print(f"\nTraining Summary:")
        print(f"  Avg Loss: {avg_train_loss:.4f}")
        print(f"  Emotion Accuracy: {train_emo_acc:.3f}")
        print(f"  Hidden Accuracy: {train_hid_acc:.3f}")
        if mlflow.active_run():
            mlflow.log_metrics({"train_loss": avg_train_loss, "train_emo_accuracy": train_emo_acc, "train_hid_accuracy": train_hid_acc}, step=epoch + 1)

        # Validation phase
        print(f"\nValidation Results:")
        val_metrics = evaluate_model(model, val_loader, criterion)
        if mlflow.active_run():
            mlflow.log_metrics({"val_emo_accuracy": val_metrics["emo_accuracy"], "val_hid_accuracy": val_metrics["hid_accuracy"], "val_hid_f1": val_metrics["hid_f1"]}, step=epoch + 1)

        # Early stopping check
        current_val_acc = val_metrics["emo_accuracy"]

        if current_val_acc > best_val_acc:
            best_val_acc = current_val_acc
            patience_counter = 0
            best_model_state = model.state_dict().copy()
            torch.save(model.state_dict(), "best_emotion_model.pt")
            print(f"  ‚úì New best model saved! (Acc: {current_val_acc:.3f})")
        else:
            patience_counter += 1
            print(f"  ‚è≥ No improvement ({patience_counter}/{patience})")

        if patience_counter >= patience:
            print(f"\nEarly stopping triggered at epoch {epoch + 1}")
            break

    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
        print(f"\nLoaded best model with validation accuracy: {best_val_acc:.3f}")

    # Final evaluation
    print("\n" + "=" * 60)
    print("FINAL EVALUATION ON BEST MODEL")
    print("=" * 60)
    final_metrics = evaluate_model(model, val_loader)

    return model, final_metrics

# ============================================================
# 11) Train the model (with MLflow)
# ============================================================
with mlflow.start_run():
    mlflow.log_params({
        "model_name": model_name,
        "num_epochs": num_epochs,
        "batch_size": 32,
        "lr_encoder": 2e-5,
        "lr_head": 5e-5,
        "data": CSV_PATH,
        "max_length": 128,
    })
    trained_model, metrics = train_model()
    mlflow.log_metrics({
        "final_val_emo_accuracy": metrics["emo_accuracy"],
        "final_val_hid_accuracy": metrics["hid_accuracy"],
        "final_val_hid_f1": metrics["hid_f1"],
    })
    log_pytorch_model(trained_model, artifact_path="model")
    log_label_encoder(le)
    log_training_config({"class_names": list(le.classes_)})

# Save locally as well
torch.save({
    'model_state_dict': trained_model.state_dict(),
    'label_encoder': le,
    'class_names': list(le.classes_),
    'tokenizer': tokenizer,
}, "final_emotion_hidden_model.pt")

print("\nModel saved as 'final_emotion_hidden_model.pt'")

# ============================================================
# 12) Prediction function
# ============================================================
def predict_emotion(text, emoji_char="", model=None, tokenizer=None, le=None):
    if model is None:
        # Load saved model
        checkpoint = torch.load("final_emotion_hidden_model.pt", map_location=device)
        model = EnhancedEmotionHiddenModel(model_name, len(checkpoint['class_names'])).to(device)
        model.load_state_dict(checkpoint['model_state_dict'])
        le = checkpoint['label_encoder']
        tokenizer = checkpoint['tokenizer']

    model.eval()

    # Preprocess
    proc_text = build_input(text, emoji_char)

    # Tokenize
    enc = tokenizer(
        proc_text,
        padding=True,
        truncation=True,
        max_length=128,
        return_tensors="pt",
    ).to(device)

    with torch.no_grad():
        emo_logits, hid_logits = model(
            input_ids=enc["input_ids"],
            attention_mask=enc["attention_mask"],
        )

        emo_probs = torch.softmax(emo_logits, dim=-1)[0]
        hid_prob = torch.sigmoid(hid_logits)[0].item()

        emo_id = torch.argmax(emo_probs).item()
        emo_label = le.inverse_transform([emo_id])[0]
        emo_confidence = emo_probs[emo_id].item()

        # Get top-3 predictions
        top_probs, top_indices = torch.topk(emo_probs, 3)
        top_emotions = le.inverse_transform(top_indices.cpu().numpy())
        top_confidences = top_probs.cpu().numpy()

    result = {
        "predicted_emotion": emo_label,
        "emotion_confidence": emo_confidence,
        "hidden_probability": hid_prob,
        "is_hidden": hid_prob > 0.5,
        "top_predictions": [
            {"emotion": e, "confidence": float(c)}
            for e, c in zip(top_emotions, top_confidences)
        ],
        "processed_text": proc_text
    }

    return result




Using device: cuda
Label order: ['anger', 'fear', 'joy', 'love', 'sadness', 'surprise']

Class distribution:
hidden_emotion_label
anger       1195
joy         1065
sadness     1032
fear         439
love         363
surprise     339
Name: count, dtype: int64

Total samples: 4433

Training samples: 3546
Validation samples: 887


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]


Tokenizer vocab size: 128000

Training class counts: {2: 852, 4: 826, 3: 290, 5: 271, 0: 956, 1: 351}
Class weights (effective num): {0: 0.6262003418440383, 1: 0.6387123742438989, 2: 1.4262803458788826, 3: 1.5125110761714946, 4: 0.5833502001023915, 5: 1.212945661759293}


pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]


Model parameters: 185,090,503

Trainable encoder params: 131
Trainable head params: 16

STARTING TRAINING

EPOCH 1/5


model.safetensors:   0%|          | 0.00/371M [00:00<?, ?B/s]

  Batch 50/111 | Loss: 2.1392 | Emo Acc: 0.259 | Hid Acc: 0.466
  Batch 100/111 | Loss: 1.5656 | Emo Acc: 0.332 | Hid Acc: 0.481

Training Summary:
  Avg Loss: 2.1303
  Emotion Accuracy: 0.350
  Hidden Accuracy: 0.488

Validation Results:
EMOTION CLASSIFICATION REPORT
              precision    recall  f1-score   support

       anger      0.000     0.000     0.000       239
        fear      0.000     0.000     0.000        88
         joy      0.360     0.981     0.527       213
        love      0.631     0.726     0.675        73
     sadness      0.667     0.049     0.090       206
    surprise      0.271     0.824     0.407        68

    accuracy                          0.370       887
   macro avg      0.321     0.430     0.283       887
weighted avg      0.314     0.370     0.234       887


PER-CLASS ACCURACY:
anger     : 0.000 (0/239)
fear      : 0.000 (0/88)
joy       : 0.981 (209/213)
love      : 0.726 (53/73)
sadness   : 0.049 (10/206)
surprise  : 0.824 (56/68)

Overall 

In [2]:
drive_path = "model"  # folder (relative path in workspace)

import os, torch, pickle, json
os.makedirs(drive_path, exist_ok=True)



# 1) save tokenizer
tokenizer.save_pretrained(drive_path)

# 2) save label encoder + class names
with open(f"{drive_path}/label_encoder.pkl", "wb") as f:
    pickle.dump(le, f)

class_names = list(le.classes_)
with open(f"{drive_path}/class_names.json", "w") as f:
    json.dump(class_names, f)

# 3) save model weights
torch.save(trained_model.state_dict(), f"{drive_path}/model_state.pt")


NameError: name 'tokenizer' is not defined

In [None]:
import torch, pickle, json
from transformers import AutoTokenizer, AutoModel
import torch.nn as nn
import torch.nn.functional as F
import emoji

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
drive_path = "model"  # relative path in workspace
model_name = "microsoft/deberta-v3-base"

# tokenizer + labels
tokenizer = AutoTokenizer.from_pretrained(drive_path)
with open(f"{drive_path}/label_encoder.pkl", "rb") as f:
    le = pickle.load(f)
with open(f"{drive_path}/class_names.json", "r") as f:
    class_names = json.load(f)

# SAME build_input as in training
negative_keywords = [
    "hate", "angry", "mad", "furious", "sad", "depressed", "terrible",
    "cry", "crying", "die", "dead", "kill", "killing", "awful",
    "annoying", "stupid", "idiot", "worst", "bad", "horrible"
]

positive_keywords = [
    "love", "happy", "joy", "great", "wonderful", "amazing",
    "excellent", "perfect", "best", "good", "nice", "fantastic"
]

def emoji_to_description(ch):
    if not ch:
        return ""
    desc = emoji.demojize(ch).strip(":").replace("_", " ")
    return desc

def has_negative_word(text):
    t = text.lower()
    return any(neg in t for neg in negative_keywords)

def has_positive_word(text):
    t = text.lower()
    return any(pos in t for pos in positive_keywords)

def build_input(text, emoji_char):
    """
    Enhanced preprocessing:
    1) Convert emoji to semantic description
    2) Detect emotion-text conflicts
    3) Add context tokens
    """
    text = str(text).strip()
    desc = emoji_to_description(emoji_char)
    token_prefixes = []

    if desc:
        token_prefixes.append(f"[EMOJI={desc}]")

    # Enhanced conflict detection
    if desc:
        # Positive emojis with negative text
        positive_emoji_cues = ["smile", "grin", "laugh", "heart", "joy", "relieved", "wink", "blush"]
        negative_emoji_cues = ["angry", "cry", "sad", "fear", "scared", "worried", "pouting"]

        is_positive_emoji = any(cue in desc for cue in positive_emoji_cues)
        is_negative_emoji = any(cue in desc for cue in negative_emoji_cues)

        if is_positive_emoji and has_negative_word(text):
            token_prefixes.append("[CONFLICT_POS_EMOJI_NEG_TEXT]")
        elif is_negative_emoji and has_positive_word(text):
            token_prefixes.append("[CONFLICT_NEG_EMOJI_POS_TEXT]")

        # Special handling for common emojis
        if "smiling" in desc or "grinning" in desc:
            token_prefixes.append("[SMILE_EMOJI]")
        elif "heart" in desc:
            token_prefixes.append("[HEART_EMOJI]")
        elif "crying" in desc or "tear" in desc:
            token_prefixes.append("[CRY_EMOJI]")
        elif "angry" in desc:
            token_prefixes.append("[ANGRY_EMOJI]")

    # Add length indicator for hidden emotion detection
    if len(text.split()) > 15:
        token_prefixes.append("[LONG_TEXT]")

    prefix = " ".join(token_prefixes)
    if prefix:
        return prefix + " " + text
    return text

class EnhancedEmotionHiddenModel(nn.Module):
    def __init__(self, base_name, num_emotions, dropout_p=0.3):
        super().__init__()
        self.encoder = AutoModel.from_pretrained(base_name)
        hidden_size = self.encoder.config.hidden_size
        self.emotion_head = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.LayerNorm(hidden_size // 2),
            nn.GELU(),
            nn.Dropout(dropout_p),
            nn.Linear(hidden_size // 2, hidden_size // 4),
            nn.GELU(),
            nn.Dropout(dropout_p),
            nn.Linear(hidden_size // 4, num_emotions),
        )
        self.hidden_head = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.GELU(),
            nn.Dropout(dropout_p),
            nn.Linear(hidden_size // 2, 1),
        )
        self.shared_projection = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.GELU(),
            nn.Dropout(dropout_p),
        )
    def forward(self, input_ids, attention_mask):
        outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        hs = outputs.last_hidden_state
        mask_exp = attention_mask.unsqueeze(-1).expand(hs.size()).float()
        summed = (hs * mask_exp).sum(dim=1)
        summed_mask = torch.clamp(mask_exp.sum(dim=1), min=1e-9)
        pooled = summed / summed_mask
        shared = self.shared_projection(pooled)
        emo_logits = self.emotion_head(shared)
        hid_logits = self.hidden_head(shared).squeeze(-1)
        return emo_logits, hid_logits

num_emotions = len(class_names)
model = EnhancedEmotionHiddenModel(model_name, num_emotions, dropout_p=0.3).to(device)

state_dict = torch.load(f"{drive_path}/model_state.pt", map_location=device)
model.load_state_dict(state_dict)
model.eval()

print(f"Model loaded successfully from '{drive_path}'")
print(f"Classes: {class_names}")


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


FileNotFoundError: [Errno 2] No such file or directory: '/model/label_encoder.pkl'

In [None]:
emotion_id2label = {i: lab for i, lab in enumerate(class_names)}

def predict_emotion(text, emoji_char=""):
    model.eval()
    proc_text = build_input(text, emoji_char)
    enc = tokenizer(
        proc_text,
        padding=True,
        truncation=True,
        max_length=128,
        return_tensors="pt",
    ).to(device)
    with torch.no_grad():
        emo_logits, hid_logits = model(
            input_ids=enc["input_ids"],
            attention_mask=enc["attention_mask"],
        )
        emo_probs = torch.softmax(emo_logits, dim=-1)[0]
        hid_prob = torch.sigmoid(hid_logits)[0].item()

    emo_id = int(torch.argmax(emo_probs).item())
    return {
        "emotion": emotion_id2label[emo_id],
        "emotion_conf": float(emo_probs[emo_id].item()),
        "hidden_flag": bool(hid_prob > 0.5),
        "hidden_prob": hid_prob,
    }


In [None]:
test_examples = [
    ("I‚Äôm not angry üò°, just frustrated", "üò°"),
    ("Not angry üò°, just annoyed", "üò°"),
    ("I am angry üò°", "üò°"),
("This makes me angry üò†", "üò†"),
("I‚Äôm really mad about this üò°", "üò°"),
("I‚Äôm upset and angry üò†", "üò†"),
    ("I understand your decision, happy to move forward", "üôÇüò°"),
        ("I dont love you ‚òπÔ∏èüòí", "‚òπÔ∏èüòí")



]


for text, emo in test_examples:
    out = predict_emotion(text, emoji_char=emo)
    print("text:", text)
    print("emoji:", emo)
    print("pred:", out)
    print("----")


text: I‚Äôm not angry üò°, just frustrated
emoji: üò°
pred: {'emotion': 'anger', 'emotion_conf': 0.4897781014442444, 'hidden_flag': False, 'hidden_prob': 0.08016029745340347}
----
text: Not angry üò°, just annoyed
emoji: üò°
pred: {'emotion': 'anger', 'emotion_conf': 0.4608343243598938, 'hidden_flag': False, 'hidden_prob': 0.05819237604737282}
----
text: I am angry üò°
emoji: üò°
pred: {'emotion': 'anger', 'emotion_conf': 0.3995002806186676, 'hidden_flag': False, 'hidden_prob': 0.05505933612585068}
----
text: This makes me angry üò†
emoji: üò†
pred: {'emotion': 'joy', 'emotion_conf': 0.46605268120765686, 'hidden_flag': False, 'hidden_prob': 0.08976083993911743}
----
text: I‚Äôm really mad about this üò°
emoji: üò°
pred: {'emotion': 'anger', 'emotion_conf': 0.43273335695266724, 'hidden_flag': False, 'hidden_prob': 0.07402713596820831}
----
text: I‚Äôm upset and angry üò†
emoji: üò†
pred: {'emotion': 'anger', 'emotion_conf': 0.35993197560310364, 'hidden_flag': False, 'hidden_