In [None]:
import os, pickle, random
import numpy as np
import pandas as pd
from datetime import datetime
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, f1_score, accuracy_score
import matplotlib.pyplot as plt
from scipy.stats import mode
from transformers import DebertaV2Tokenizer, DebertaV2ForSequenceClassification, get_cosine_schedule_with_warmup
import torch
from torch.utils.data import DataLoader, Dataset
from torch.cuda.amp import GradScaler, autocast

# -------------------- CONFIG --------------------
SEED = 42
BATCH_SIZE = 16
VAL_BATCH_SIZE = 32
MAX_LEN = 128
EPOCHS = 8
PATIENCE = 3
LR = 2e-5
MODEL_NAME = "microsoft/deberta-v3-small"
BASE_DIR = "/content/drive/MyDrive/FIRE"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# -------------------- UTILS --------------------
def set_seed(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
set_seed()

# -------------------- DATASET --------------------
class CryptoDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
    def __len__(self): return len(self.texts)
    def __getitem__(self, idx):
        return {
            "text": self.texts[idx],
            "label": self.labels[idx]
        }

def collate_fn(batch):
    texts = [x["text"] for x in batch]
    labels = torch.tensor([x["label"] for x in batch], dtype=torch.long)
    encoding = tokenizer(texts, padding=True, truncation=True, max_length=MAX_LEN, return_tensors="pt")
    return {
        "input_ids": encoding["input_ids"],
        "attention_mask": encoding["attention_mask"],
        "labels": labels
    }

# -------------------- LOSSES --------------------
class FocalLoss(torch.nn.Module):
    def __init__(self, alpha=None, gamma=2.0, label_smoothing=0.1):
        super().__init__()
        self.alpha = torch.tensor(alpha).to(device) if alpha else None
        self.gamma = gamma
        self.smoothing = label_smoothing
    def forward(self, logits, target):
        ce = torch.nn.functional.cross_entropy(logits, target, reduction='none', label_smoothing=self.smoothing)
        pt = torch.exp(-ce)
        focal = (1 - pt) ** self.gamma * ce
        if self.alpha is not None:
            focal = self.alpha[target] * focal
        return focal.mean()

def supervised_contrastive_loss(embeddings, labels, temperature=0.3):
    norm = torch.nn.functional.normalize(embeddings, dim=1)
    sim = torch.matmul(norm, norm.T) / temperature
    labels = labels.view(-1, 1)
    mask = torch.eq(labels, labels.T).float().to(device)
    logits_mask = 1 - torch.eye(mask.size(0)).to(device)
    mask *= logits_mask
    exp_logits = torch.exp(sim) * logits_mask
    log_prob = sim - torch.log(exp_logits.sum(1, keepdim=True) + 1e-12)
    return -(mask * log_prob).sum(1).div(mask.sum(1) + 1e-12).mean()

# -------------------- TRAIN FUNCTION --------------------
def train_level3_model(
    train_loader, val_loader, save_path, y_train_labels,
    num_labels=4, contrastive_weight=0.3, level_name="level3_fold", use_amp=True
):
    model = DebertaV2ForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=num_labels, output_hidden_states=True).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
    total_steps = len(train_loader) * EPOCHS
    scheduler = get_cosine_schedule_with_warmup(optimizer, 0.1 * total_steps, total_steps)
    scaler = GradScaler(enabled=use_amp)

    class_counts = np.bincount(y_train_labels)
    class_weights = 1.0 / (np.log(1.01 + class_counts))
    class_weights = class_weights / class_weights.sum()
    focal_loss = FocalLoss(alpha=class_weights.tolist(), gamma=2.0, label_smoothing=0.1)

    best_f1, patience_counter = -1, 0
    train_loss_hist, val_f1_hist, train_f1_hist, train_acc_hist, val_acc_hist = [], [], [], [], []
    log_file_path = save_path.replace(".pth", "_log.txt")
    with open(log_file_path, "w") as log_file:
        for epoch in range(EPOCHS):
            model.train(); total_loss = 0; preds, targets = [], []
            for batch in tqdm(train_loader, desc=f"[{level_name}] Epoch {epoch+1}"):
                batch = {k: v.to(device) for k, v in batch.items()}
                optimizer.zero_grad()
                with autocast(enabled=use_amp):
                    out = model(**batch)
                    logits = out.logits
                    cls_emb = out.hidden_states[-1][:, 0]
                    loss = focal_loss(logits, batch["labels"])
                    if batch["labels"].unique().numel() > 1:
                        loss += contrastive_weight * supervised_contrastive_loss(cls_emb, batch["labels"])
                scaler.scale(loss).backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                scaler.step(optimizer)
                scaler.update()
                scheduler.step()
                total_loss += loss.item()
                preds += logits.argmax(dim=-1).cpu().tolist()
                targets += batch["labels"].cpu().tolist()

            train_loss_hist.append(total_loss)
            train_acc = accuracy_score(targets, preds)
            train_f1 = f1_score(targets, preds, average="weighted")
            train_f1_hist.append(train_f1)
            train_acc_hist.append(train_acc)

            model.eval(); val_preds, val_targets = [], []
            with torch.no_grad():
                for batch in val_loader:
                    batch = {k: v.to(device) for k, v in batch.items()}
                    with autocast(enabled=use_amp):
                        logits = model(**batch).logits
                    val_preds += logits.argmax(dim=-1).cpu().tolist()
                    val_targets += batch["labels"].cpu().tolist()
            val_f1 = f1_score(val_targets, val_preds, average="weighted")
            val_acc = accuracy_score(val_targets, val_preds)
            val_f1_hist.append(val_f1)
            val_acc_hist.append(val_acc)

            line = f" Epoch {epoch+1}: Train Loss = {total_loss:.4f} | Train Acc = {train_acc:.4f} | Train F1 = {train_f1:.4f} | Val Acc = {val_acc:.4f} | Val F1 = {val_f1:.4f}"
            print(line)
            log_file.write(line + "\n")
            if val_f1 > best_f1:
                best_f1 = val_f1
                patience_counter = 0
                torch.save(model.state_dict(), save_path)
                print(f" Model saved at: {save_path}")
                log_file.write(f"Saved model: {save_path}\n")
            else:
                patience_counter += 1
                if patience_counter >= PATIENCE:
                    print(" Early stopping")
                    log_file.write("Early stopping\n")
                    break

    # Plot & log
    plot_path = save_path.replace(".pth", "_plot.png")
    plt.figure(figsize=(10, 6))
    plt.plot(train_loss_hist, label="Train Loss")
    plt.plot(train_f1_hist, label="Train F1")
    plt.plot(train_acc_hist, label="Train Acc")
    plt.plot(val_f1_hist, label="Val F1")
    plt.plot(val_acc_hist, label="Val Acc")
    plt.title(f"{level_name} Training")
    plt.xlabel("Epoch")
    plt.legend(), plt.grid(True)
    plt.savefig(plot_path); plt.close()
    return model

# -------------------- MAIN SCRIPT --------------------
if __name__ == "__main__":
    run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
    run_dir = os.path.join(BASE_DIR, f"run_{run_id}")
    os.makedirs(run_dir, exist_ok=True)
    for sub in ["models", "logs", "plots", "encoders"]: os.makedirs(os.path.join(run_dir, sub), exist_ok=True)

    train_df = pd.read_csv(f"{BASE_DIR}/crypto_task1_train.csv")
    val_df = pd.read_csv(f"{BASE_DIR}/crypto_task1_val.csv")

    print(" Unique values in level_1 (train):", train_df["level_1"].unique())
    print(" Unique values in level_2 (train):", train_df["level_2"].unique())

    train_l3_df = train_df[(train_df["level_1"] == 2) & (train_df["level_2"] == 0)].copy().reset_index(drop=True)
    val_l3_df = val_df[(val_df["level_1"] == 2) & (val_df["level_2"] == 0)].copy().reset_index(drop=True)

    if train_l3_df.empty:
        print(" No NEUTRAL samples found in level_2 under SUBJECTIVE. Skipping Level 3 training.")
    else:
        le3 = LabelEncoder()
        train_l3_df["level_3_enc"] = le3.fit_transform(train_l3_df["level_3"])
        val_l3_df["level_3_enc"] = le3.transform(val_l3_df["level_3"])
        pickle.dump(le3, open(f"{run_dir}/encoders/label_encoder_level_3.pkl", "wb"))

        tokenizer = DebertaV2Tokenizer.from_pretrained(MODEL_NAME)

        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
        level3_preds, level3_labels = [], []

        for fold, (tr_idx, va_idx) in enumerate(skf.split(train_l3_df, train_l3_df["level_3_enc"])):
            print(f"\n Fold {fold+1}/5")
            tr_df = train_l3_df.loc[tr_idx].reset_index(drop=True)
            va_df = train_l3_df.loc[va_idx].reset_index(drop=True)
            tr_loader = DataLoader(CryptoDataset(tr_df["text"], tr_df["level_3_enc"], tokenizer), batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
            va_loader = DataLoader(CryptoDataset(va_df["text"], va_df["level_3_enc"], tokenizer), batch_size=VAL_BATCH_SIZE, shuffle=False, collate_fn=collate_fn)
            save_path = os.path.join(run_dir, "models", f"level3_fold{fold+1}.pth")

            model = train_level3_model(
                tr_loader, va_loader, save_path,
                y_train_labels=tr_df["level_3_enc"].values,
                level_name=f"level3_fold{fold+1}"
            )

            model.eval(); preds, targets = [], []
            with torch.no_grad():
                for batch in va_loader:
                    batch = {k: v.to(device) for k, v in batch.items()}
                    logits = model(**batch).logits
                    preds += logits.argmax(dim=-1).cpu().tolist()
                    targets += batch["labels"].cpu().tolist()
            level3_preds.append(preds)
            level3_labels.append(targets)

        pred_matrix = np.array(level3_preds)
        majority = mode(pred_matrix, axis=0).mode[0]
        true_labels = np.array(level3_labels[0])
        acc = accuracy_score(true_labels, majority)
        f1 = f1_score(true_labels, majority, average="weighted")
        print(f"\n Level 3 Ensemble Accuracy: {acc:.4f} | F1: {f1:.4f}")


🔍 Unique values in level_1 (train): [2 0 1]
🔍 Unique values in level_2 (train): [ 2.  0. nan  1.]

🔁 Fold 1/5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = GradScaler(enabled=use_amp)
  with autocast(enabled=use_amp):
[level3_fold1] Epoch 1: 100%|██████████| 293/293 [00:51<00:00,  5.72it/s]
  with autocast(enabled=use_amp):


📊 Epoch 1: Train Loss = 270.5017 | Train Acc = 0.4150 | Train F1 = 0.3758 | Val Acc = 0.5444 | Val F1 = 0.3839
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold1.pth


  with autocast(enabled=use_amp):
[level3_fold1] Epoch 2: 100%|██████████| 293/293 [00:43<00:00,  6.77it/s]
  with autocast(enabled=use_amp):


📊 Epoch 2: Train Loss = 265.6484 | Train Acc = 0.5640 | Train F1 = 0.4558 | Val Acc = 0.7222 | Val F1 = 0.6716
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold1.pth


  with autocast(enabled=use_amp):
[level3_fold1] Epoch 3: 100%|██████████| 293/293 [00:49<00:00,  5.91it/s]
  with autocast(enabled=use_amp):


📊 Epoch 3: Train Loss = 240.0111 | Train Acc = 0.7496 | Train F1 = 0.6986 | Val Acc = 0.7803 | Val F1 = 0.7306
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold1.pth


  with autocast(enabled=use_amp):
[level3_fold1] Epoch 4: 100%|██████████| 293/293 [00:45<00:00,  6.43it/s]
  with autocast(enabled=use_amp):


📊 Epoch 4: Train Loss = 224.9056 | Train Acc = 0.7830 | Train F1 = 0.7322 | Val Acc = 0.7889 | Val F1 = 0.7372
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold1.pth


  with autocast(enabled=use_amp):
[level3_fold1] Epoch 5: 100%|██████████| 293/293 [00:58<00:00,  4.98it/s]
  with autocast(enabled=use_amp):


📊 Epoch 5: Train Loss = 219.5839 | Train Acc = 0.7971 | Train F1 = 0.7506 | Val Acc = 0.7974 | Val F1 = 0.7447
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold1.pth


  with autocast(enabled=use_amp):
[level3_fold1] Epoch 6: 100%|██████████| 293/293 [00:46<00:00,  6.37it/s]
  with autocast(enabled=use_amp):


📊 Epoch 6: Train Loss = 216.8281 | Train Acc = 0.8110 | Train F1 = 0.7728 | Val Acc = 0.7983 | Val F1 = 0.7557
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold1.pth


  with autocast(enabled=use_amp):
[level3_fold1] Epoch 7: 100%|██████████| 293/293 [00:47<00:00,  6.11it/s]
  with autocast(enabled=use_amp):


📊 Epoch 7: Train Loss = 213.0904 | Train Acc = 0.8238 | Train F1 = 0.7929 | Val Acc = 0.8043 | Val F1 = 0.7640
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold1.pth


  with autocast(enabled=use_amp):
[level3_fold1] Epoch 8: 100%|██████████| 293/293 [00:46<00:00,  6.30it/s]
  with autocast(enabled=use_amp):


📊 Epoch 8: Train Loss = 214.5947 | Train Acc = 0.8249 | Train F1 = 0.7943 | Val Acc = 0.8060 | Val F1 = 0.7708
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold1.pth

🔁 Fold 2/5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = GradScaler(enabled=use_amp)
  with autocast(enabled=use_amp):
[level3_fold2] Epoch 1: 100%|██████████| 293/293 [00:55<00:00,  5.24it/s]
  with autocast(enabled=use_amp):


📊 Epoch 1: Train Loss = 271.9362 | Train Acc = 0.5020 | Train F1 = 0.3799 | Val Acc = 0.5436 | Val F1 = 0.3829
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold2.pth


  with autocast(enabled=use_amp):
[level3_fold2] Epoch 2: 100%|██████████| 293/293 [00:46<00:00,  6.25it/s]
  with autocast(enabled=use_amp):


📊 Epoch 2: Train Loss = 267.1633 | Train Acc = 0.5407 | Train F1 = 0.3902 | Val Acc = 0.5436 | Val F1 = 0.3829


  with autocast(enabled=use_amp):
[level3_fold2] Epoch 3: 100%|██████████| 293/293 [00:45<00:00,  6.42it/s]
  with autocast(enabled=use_amp):


📊 Epoch 3: Train Loss = 252.0105 | Train Acc = 0.6588 | Train F1 = 0.5958 | Val Acc = 0.7615 | Val F1 = 0.7095
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold2.pth


  with autocast(enabled=use_amp):
[level3_fold2] Epoch 4: 100%|██████████| 293/293 [00:45<00:00,  6.42it/s]
  with autocast(enabled=use_amp):


📊 Epoch 4: Train Loss = 227.9157 | Train Acc = 0.7734 | Train F1 = 0.7231 | Val Acc = 0.7812 | Val F1 = 0.7298
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold2.pth


  with autocast(enabled=use_amp):
[level3_fold2] Epoch 5: 100%|██████████| 293/293 [00:45<00:00,  6.42it/s]
  with autocast(enabled=use_amp):


📊 Epoch 5: Train Loss = 222.3213 | Train Acc = 0.7875 | Train F1 = 0.7431 | Val Acc = 0.7923 | Val F1 = 0.7410
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold2.pth


  with autocast(enabled=use_amp):
[level3_fold2] Epoch 6: 100%|██████████| 293/293 [00:45<00:00,  6.47it/s]
  with autocast(enabled=use_amp):


📊 Epoch 6: Train Loss = 219.5630 | Train Acc = 0.8014 | Train F1 = 0.7670 | Val Acc = 0.7932 | Val F1 = 0.7442
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold2.pth


  with autocast(enabled=use_amp):
[level3_fold2] Epoch 7: 100%|██████████| 293/293 [00:46<00:00,  6.28it/s]
  with autocast(enabled=use_amp):


📊 Epoch 7: Train Loss = 217.5133 | Train Acc = 0.8097 | Train F1 = 0.7791 | Val Acc = 0.7863 | Val F1 = 0.7429


  with autocast(enabled=use_amp):
[level3_fold2] Epoch 8: 100%|██████████| 293/293 [00:42<00:00,  6.86it/s]
  with autocast(enabled=use_amp):


📊 Epoch 8: Train Loss = 214.3938 | Train Acc = 0.8123 | Train F1 = 0.7866 | Val Acc = 0.7863 | Val F1 = 0.7428

🔁 Fold 3/5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = GradScaler(enabled=use_amp)
  with autocast(enabled=use_amp):
[level3_fold3] Epoch 1: 100%|██████████| 293/293 [00:42<00:00,  6.84it/s]
  with autocast(enabled=use_amp):


📊 Epoch 1: Train Loss = 274.1547 | Train Acc = 0.3957 | Train F1 = 0.3540 | Val Acc = 0.5441 | Val F1 = 0.3834
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold3.pth


  with autocast(enabled=use_amp):
[level3_fold3] Epoch 2: 100%|██████████| 293/293 [00:43<00:00,  6.74it/s]
  with autocast(enabled=use_amp):


📊 Epoch 2: Train Loss = 267.1848 | Train Acc = 0.5481 | Train F1 = 0.3996 | Val Acc = 0.6039 | Val F1 = 0.5083
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold3.pth


  with autocast(enabled=use_amp):
[level3_fold3] Epoch 3: 100%|██████████| 293/293 [00:45<00:00,  6.40it/s]
  with autocast(enabled=use_amp):


📊 Epoch 3: Train Loss = 240.6250 | Train Acc = 0.7435 | Train F1 = 0.6926 | Val Acc = 0.7725 | Val F1 = 0.7215
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold3.pth


  with autocast(enabled=use_amp):
[level3_fold3] Epoch 4: 100%|██████████| 293/293 [00:45<00:00,  6.39it/s]
  with autocast(enabled=use_amp):


📊 Epoch 4: Train Loss = 226.7806 | Train Acc = 0.7822 | Train F1 = 0.7309 | Val Acc = 0.7810 | Val F1 = 0.7295
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold3.pth


  with autocast(enabled=use_amp):
[level3_fold3] Epoch 5: 100%|██████████| 293/293 [00:45<00:00,  6.41it/s]
  with autocast(enabled=use_amp):


📊 Epoch 5: Train Loss = 223.6059 | Train Acc = 0.7903 | Train F1 = 0.7380 | Val Acc = 0.7870 | Val F1 = 0.7337
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold3.pth


  with autocast(enabled=use_amp):
[level3_fold3] Epoch 6: 100%|██████████| 293/293 [00:46<00:00,  6.34it/s]
  with autocast(enabled=use_amp):


📊 Epoch 6: Train Loss = 219.9893 | Train Acc = 0.7963 | Train F1 = 0.7479 | Val Acc = 0.7930 | Val F1 = 0.7406
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold3.pth


  with autocast(enabled=use_amp):
[level3_fold3] Epoch 7: 100%|██████████| 293/293 [00:45<00:00,  6.43it/s]
  with autocast(enabled=use_amp):


📊 Epoch 7: Train Loss = 216.3455 | Train Acc = 0.8063 | Train F1 = 0.7587 | Val Acc = 0.7947 | Val F1 = 0.7428
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold3.pth


  with autocast(enabled=use_amp):
[level3_fold3] Epoch 8: 100%|██████████| 293/293 [00:45<00:00,  6.47it/s]
  with autocast(enabled=use_amp):


📊 Epoch 8: Train Loss = 215.3091 | Train Acc = 0.8087 | Train F1 = 0.7635 | Val Acc = 0.7930 | Val F1 = 0.7399

🔁 Fold 4/5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = GradScaler(enabled=use_amp)
  with autocast(enabled=use_amp):
[level3_fold4] Epoch 1: 100%|██████████| 293/293 [00:42<00:00,  6.86it/s]
  with autocast(enabled=use_amp):


📊 Epoch 1: Train Loss = 271.6054 | Train Acc = 0.4528 | Train F1 = 0.4219 | Val Acc = 0.5449 | Val F1 = 0.3844
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold4.pth


  with autocast(enabled=use_amp):
[level3_fold4] Epoch 2: 100%|██████████| 293/293 [00:43<00:00,  6.74it/s]
  with autocast(enabled=use_amp):


📊 Epoch 2: Train Loss = 268.1300 | Train Acc = 0.5442 | Train F1 = 0.3958 | Val Acc = 0.5449 | Val F1 = 0.3844


  with autocast(enabled=use_amp):
[level3_fold4] Epoch 3: 100%|██████████| 293/293 [00:42<00:00,  6.84it/s]
  with autocast(enabled=use_amp):


📊 Epoch 3: Train Loss = 266.2803 | Train Acc = 0.5622 | Train F1 = 0.4598 | Val Acc = 0.6595 | Val F1 = 0.5948
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold4.pth


  with autocast(enabled=use_amp):
[level3_fold4] Epoch 4: 100%|██████████| 293/293 [00:47<00:00,  6.16it/s]
  with autocast(enabled=use_amp):


📊 Epoch 4: Train Loss = 242.2841 | Train Acc = 0.7204 | Train F1 = 0.6713 | Val Acc = 0.7784 | Val F1 = 0.7289
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold4.pth


  with autocast(enabled=use_amp):
[level3_fold4] Epoch 5: 100%|██████████| 293/293 [00:46<00:00,  6.34it/s]
  with autocast(enabled=use_amp):


📊 Epoch 5: Train Loss = 226.7863 | Train Acc = 0.7700 | Train F1 = 0.7265 | Val Acc = 0.7887 | Val F1 = 0.7351
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold4.pth


  with autocast(enabled=use_amp):
[level3_fold4] Epoch 6: 100%|██████████| 293/293 [00:45<00:00,  6.38it/s]
  with autocast(enabled=use_amp):


📊 Epoch 6: Train Loss = 221.0464 | Train Acc = 0.7926 | Train F1 = 0.7539 | Val Acc = 0.7913 | Val F1 = 0.7427
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold4.pth


  with autocast(enabled=use_amp):
[level3_fold4] Epoch 7: 100%|██████████| 293/293 [00:46<00:00,  6.31it/s]
  with autocast(enabled=use_amp):


📊 Epoch 7: Train Loss = 218.9074 | Train Acc = 0.7950 | Train F1 = 0.7571 | Val Acc = 0.7956 | Val F1 = 0.7553
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold4.pth


  with autocast(enabled=use_amp):
[level3_fold4] Epoch 8: 100%|██████████| 293/293 [00:46<00:00,  6.31it/s]
  with autocast(enabled=use_amp):


📊 Epoch 8: Train Loss = 217.8447 | Train Acc = 0.8021 | Train F1 = 0.7697 | Val Acc = 0.7947 | Val F1 = 0.7550

🔁 Fold 5/5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = GradScaler(enabled=use_amp)
  with autocast(enabled=use_amp):
[level3_fold5] Epoch 1: 100%|██████████| 293/293 [00:42<00:00,  6.81it/s]
  with autocast(enabled=use_amp):


📊 Epoch 1: Train Loss = 271.6637 | Train Acc = 0.4590 | Train F1 = 0.4282 | Val Acc = 0.5449 | Val F1 = 0.3844
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold5.pth


  with autocast(enabled=use_amp):
[level3_fold5] Epoch 2: 100%|██████████| 293/293 [00:43<00:00,  6.70it/s]
  with autocast(enabled=use_amp):


📊 Epoch 2: Train Loss = 266.6303 | Train Acc = 0.5552 | Train F1 = 0.4313 | Val Acc = 0.6048 | Val F1 = 0.5280
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold5.pth


  with autocast(enabled=use_amp):
[level3_fold5] Epoch 3: 100%|██████████| 293/293 [00:45<00:00,  6.43it/s]
  with autocast(enabled=use_amp):


📊 Epoch 3: Train Loss = 238.7289 | Train Acc = 0.7347 | Train F1 = 0.6902 | Val Acc = 0.7759 | Val F1 = 0.7238
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold5.pth


  with autocast(enabled=use_amp):
[level3_fold5] Epoch 4: 100%|██████████| 293/293 [00:46<00:00,  6.37it/s]
  with autocast(enabled=use_amp):


📊 Epoch 4: Train Loss = 225.6900 | Train Acc = 0.7792 | Train F1 = 0.7421 | Val Acc = 0.7827 | Val F1 = 0.7293
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold5.pth


  with autocast(enabled=use_amp):
[level3_fold5] Epoch 5: 100%|██████████| 293/293 [00:45<00:00,  6.37it/s]
  with autocast(enabled=use_amp):


📊 Epoch 5: Train Loss = 219.2373 | Train Acc = 0.8046 | Train F1 = 0.7760 | Val Acc = 0.8067 | Val F1 = 0.7838
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold5.pth


  with autocast(enabled=use_amp):
[level3_fold5] Epoch 6: 100%|██████████| 293/293 [00:46<00:00,  6.33it/s]
  with autocast(enabled=use_amp):


📊 Epoch 6: Train Loss = 214.7002 | Train Acc = 0.8275 | Train F1 = 0.8109 | Val Acc = 0.8135 | Val F1 = 0.7941
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold5.pth


  with autocast(enabled=use_amp):
[level3_fold5] Epoch 7: 100%|██████████| 293/293 [00:45<00:00,  6.44it/s]
  with autocast(enabled=use_amp):


📊 Epoch 7: Train Loss = 211.0344 | Train Acc = 0.8369 | Train F1 = 0.8227 | Val Acc = 0.8221 | Val F1 = 0.8112
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold5.pth


  with autocast(enabled=use_amp):
[level3_fold5] Epoch 8: 100%|██████████| 293/293 [00:45<00:00,  6.41it/s]
  with autocast(enabled=use_amp):


📊 Epoch 8: Train Loss = 211.2309 | Train Acc = 0.8452 | Train F1 = 0.8358 | Val Acc = 0.8221 | Val F1 = 0.8131
💾 Model saved at: /content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold5.pth


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (5,) + inhomogeneous part.

In [None]:
# === CONFIRMED LEVEL 3 INFERENCE & EVALUATION SCRIPT ===
import os
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    accuracy_score,
    f1_score,
    precision_score,
    recall_score
)
from scipy.stats import mode
from transformers import DebertaV2ForSequenceClassification, AutoTokenizer
import pickle

# === CONFIG ===
model_paths = [
    "/content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold1.pth",
    "/content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold2.pth",
    "/content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold3.pth",
    "/content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold4.pth",
    "/content/drive/MyDrive/FIRE/run_20250629_121221/models/level3_fold5.pth",
]
model_name = "microsoft/deberta-v3-small"
label_encoder_path = "/content/drive/MyDrive/FIRE/run_20250629_121221/encoders/label_encoder_level_3.pkl"
val_csv_path = "/content/drive/MyDrive/FIRE/crypto_task1_val.csv"
save_dir = "/content/drive/MyDrive/FIRE/outputs/ensemble_level3_eval"
os.makedirs(save_dir, exist_ok=True)

# === Load label encoder ===
with open(label_encoder_path, "rb") as f:
    le3 = pickle.load(f)

# === Load and preprocess validation data ===
val_df = pd.read_csv(val_csv_path)
print("Columns in validation CSV:", val_df.columns.tolist())

val_df['source_token'] = val_df['source'].str.upper().map({
    'REDDIT': '[REDDIT]',
    'TWITTER': '[TWITTER]',
    'YOUTUBE': '[YOUTUBE]'
})
val_df['text'] = val_df['source_token'] + ' ' + val_df['text']

# Only NEUTRAL under SUBJECTIVE are valid for Level 3
val_df = val_df[(val_df['level_1'] == 2) & (val_df['level_2'] == 0)].copy()
val_df["level_3_enc"] = le3.transform(val_df["level_3"])
true_labels = val_df["level_3_enc"].values

# Tokenize
tokenizer = AutoTokenizer.from_pretrained(model_name)
encodings = tokenizer(
    list(val_df['text']),
    padding=True,
    truncation=True,
    return_tensors='pt',
    max_length=128
)

labels = torch.tensor(true_labels)
val_dataset = torch.utils.data.TensorDataset(
    encodings['input_ids'], encodings['attention_mask'], labels
)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32)

# === Inference ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
all_fold_preds = []

for fold, model_path in enumerate(model_paths):
    print(f"\nFold {fold+1} — loading model")
    model = DebertaV2ForSequenceClassification.from_pretrained(
        model_name, num_labels=len(le3.classes_)
    )
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()

    fold_preds = []
    with torch.no_grad():
        for batch in tqdm(val_loader, desc=f"Predicting Fold {fold+1}"):
            input_ids, attention_mask, _ = [b.to(device) for b in batch]
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()
            fold_preds.extend(preds)

    all_fold_preds.append(np.array(fold_preds))

# === Majority Voting ===
ensemble_preds = mode(np.array(all_fold_preds), axis=0).mode.squeeze()
pred_labels = le3.inverse_transform(ensemble_preds)

# Save predictions
val_df["preds"] = ensemble_preds
val_df["pred_labels"] = pred_labels
val_df.to_csv(os.path.join(save_dir, "level3_val_predictions.csv"), index=False)

# === Overall Metrics ===
acc = accuracy_score(true_labels, ensemble_preds)
f1_weighted = f1_score(true_labels, ensemble_preds, average='weighted')
f1_macro = f1_score(true_labels, ensemble_preds, average='macro')
f1_micro = f1_score(true_labels, ensemble_preds, average='micro')
prec_macro = precision_score(true_labels, ensemble_preds, average='macro')
recall_macro = recall_score(true_labels, ensemble_preds, average='macro')

try:
    report = classification_report(
        true_labels,
        ensemble_preds,
        labels=list(range(len(le3.classes_))),
        target_names=[str(c) for c in le3.classes_],
        digits=4
    )
except Exception as e:
    print("Error generating classification report:", e)
    report = classification_report(true_labels, ensemble_preds, digits=4)

metrics_text = f"""
Ensemble Accuracy: {acc:.4f}
F1 (Weighted): {f1_weighted:.4f}
F1 (Macro):    {f1_macro:.4f}
F1 (Micro):    {f1_micro:.4f}
Precision (Macro): {prec_macro:.4f}
Recall (Macro):    {recall_macro:.4f}

Classification Report:
{report}
"""

print(metrics_text)
with open(os.path.join(save_dir, "metrics.txt"), "w") as f:
    f.write(metrics_text)

# === Confusion Matrix ===
plt.figure(figsize=(6, 5))
cm = confusion_matrix(true_labels, ensemble_preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix (Level 3 Ensemble)")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.tight_layout()
plt.savefig(os.path.join(save_dir, "confusion_matrix.png"))
plt.close()

# === Platform-wise Evaluation
print("\nPlatform-wise Evaluation:")
platforms = ["youtube", "reddit", "twitter"]

for platform in platforms:
    mask = val_df['source'].str.lower() == platform
    y_true = val_df["level_3_enc"].values[mask]
    y_pred = val_df["preds"].values[mask]

    print(f"\nPlatform: {platform.upper()}")
    if len(y_true) == 0:
        print(f"No samples for {platform.upper()}. Skipping.")
        continue

    print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}")
    print(f"F1 Weighted: {f1_score(y_true, y_pred, average='weighted'):.4f}")
    print(f"F1 Macro:    {f1_score(y_true, y_pred, average='macro'):.4f}")

    try:
        platform_report = classification_report(
            y_true, y_pred,
            labels=list(range(len(le3.classes_))),
            target_names=[str(c) for c in le3.classes_],
            digits=4
        )
    except Exception as e:
        print("Error generating report:", e)
        platform_report = classification_report(y_true, y_pred, digits=4)

    print(f"Platform Report:\n{platform_report}")

print(f"\nAll outputs saved to: {save_dir}")


✅ Columns in validation CSV: ['text', 'level_1', 'level_2', 'level_3', 'source']





🔁 Fold 1 — loading model


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Predicting Fold 1: 100%|██████████| 20/20 [00:02<00:00,  7.48it/s]



🔁 Fold 2 — loading model


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Predicting Fold 2: 100%|██████████| 20/20 [00:02<00:00,  7.51it/s]



🔁 Fold 3 — loading model


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Predicting Fold 3: 100%|██████████| 20/20 [00:02<00:00,  7.55it/s]



🔁 Fold 4 — loading model


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Predicting Fold 4: 100%|██████████| 20/20 [00:02<00:00,  7.40it/s]



🔁 Fold 5 — loading model


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Predicting Fold 5: 100%|██████████| 20/20 [00:02<00:00,  7.40it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



✅ Ensemble Accuracy: 0.7598
🎯 F1 (Weighted): 0.7099
📏 F1 (Macro):    0.4122
📐 F1 (Micro):    0.7598
🎯 Precision (Macro): 0.6279
📌 Recall (Macro):    0.4531

📋 Classification Report:
              precision    recall  f1-score   support

         0.0     0.5916    0.9200    0.7202       200
         1.0     0.9200    0.8768    0.8979       341
         2.0     1.0000    0.0156    0.0308        64
         3.0     0.0000    0.0000    0.0000        32

    accuracy                         0.7598       637
   macro avg     0.6279    0.4531    0.4122       637
weighted avg     0.7787    0.7598    0.7099       637



🔎 Platform-wise Evaluation:

📦 Platform: YOUTUBE
Accuracy: 0.8898
F1 Weighted: 0.8829
F1 Macro:    0.5951
Platform Report:
              precision    recall  f1-score   support

         0.0     0.8291    1.0000    0.9066       131
         1.0     1.0000    0.7838    0.8788       111
         2.0     0.0000    0.0000    0.0000         0
         3.0     0.0000    0.0000    0.0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize