In [42]:
import json
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from tqdm import tqdm
from pathlib import Path


import warnings
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

# ==========================================================
# CONFIGURATION
# ==========================================================
np.random.seed(42)
torch.manual_seed(42)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"ðŸ”§ Device: {device}")

# Paths
ROOT = Path("Amazon_products")
TRAIN_CORPUS_PATH = ROOT / "train" / "train_corpus.txt"
TEST_CORPUS_PATH  = ROOT / "test" / "test_corpus.txt"
CLASS_PATH        = ROOT / "classes.txt"

EMB_DIR          = Path("Embeddings")
X_ALL_PATH       = EMB_DIR / "X_train_test_mpn.pt"
LABEL_EMB_PATH   = EMB_DIR / "labels_base_new_mpn.pt"

MODEL_SAVE = Path("Models")
MODEL_SAVE.mkdir(exist_ok=True)
MODEL_PATH = MODEL_SAVE / "silver_classifier.pt"

# ==========================================================
# LOAD IDS
# ==========================================================
def load_ids(path):
    ids = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            pid, _ = line.strip().split("\t", 1)
            ids.append(int(pid))
    return ids

train_ids = load_ids(TRAIN_CORPUS_PATH)
test_ids  = load_ids(TEST_CORPUS_PATH)
n_train = len(train_ids)
n_test  = len(test_ids)

print(f"Train IDs: {n_train} | Test IDs: {n_test}")

# ==========================================================
# LOAD SILVER LABELS
# ==========================================================
with open("Silver/silver_train_new_mpn_nohier.json", "r", encoding="utf-8") as f:
    raw = json.load(f)

silver_labels = {int(pid): data["labels"] for pid, data in raw.items()}

# ==========================================================
# LOAD X_all
# ==========================================================
print("\nðŸ§  Loading X_all.pt ...")
data = torch.load(X_ALL_PATH, weights_only=False)

if isinstance(data, np.ndarray):
    data = torch.from_numpy(data)
elif isinstance(data, list):
    data = torch.stack(data)

X_all = data.float().to(device)
assert X_all.shape[0] == n_train + n_test

X_train = X_all[:n_train]
X_test  = X_all[n_train:]
print(f"âœ“ X_train: {X_train.shape} | X_test: {X_test.shape}")

# ==========================================================
# LOAD LABEL EMBEDDINGS
# ==========================================================
tmp = torch.load(LABEL_EMB_PATH, weights_only=False)

# Convertir numpy â†’ tensor si nÃ©cessaire
if isinstance(tmp, np.ndarray):
    tmp = torch.from_numpy(tmp)

label_emb = tmp.float().to(device)
print(f"âœ“ Label embeddings: {label_emb.shape}")

# ==========================================================
# LOAD CLASS NAMES
# ==========================================================
classes = {}
with open(CLASS_PATH, "r", encoding="utf-8") as f:
    for line in f:
        cid, cname = line.strip().split("\t")
        classes[int(cid)] = cname

n_classes = len(classes)

pid2idx = {pid: i for i, pid in enumerate(train_ids)}


ðŸ”§ Device: cuda
Train IDs: 29487 | Test IDs: 19658

ðŸ§  Loading X_all.pt ...
âœ“ X_train: torch.Size([29487, 768]) | X_test: torch.Size([19658, 768])
âœ“ Label embeddings: torch.Size([531, 768])


In [43]:
class MultiLabelDataset(Dataset):
    def __init__(self, pids, labels_dict):
        self.pids = pids
        self.labels = labels_dict

    def __len__(self):
        return len(self.pids)

    def __getitem__(self, idx):
        pid = self.pids[idx]
        emb = X_train[pid2idx[pid]]

        y = torch.zeros(n_classes)
        for c in self.labels[pid]:
            if 0 <= c < n_classes:
                y[c] = 1.0

        return {"X": emb, "y": y}


In [44]:
train_p, val_p = train_test_split(
    list(silver_labels.keys()), test_size=0.2, random_state=42
)

train_dataset = MultiLabelDataset(train_p, silver_labels)
val_dataset   = MultiLabelDataset(val_p, silver_labels)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=64)

In [45]:
class InnerProductClassifier(nn.Module):
    def __init__(self, input_dim, label_embeddings, dropout=0.2, trainable_label_emb=False):
        super().__init__()

        D = label_embeddings.size(1)

        self.proj = nn.Linear(input_dim, D)
        self.dropout = nn.Dropout(dropout)

        if trainable_label_emb:
            self.label_emb = nn.Parameter(label_embeddings.clone())
        else:
            self.register_buffer("label_emb", label_embeddings.clone())

    def forward(self, x, use_dropout=True):
        if use_dropout:
            x = self.dropout(x)

        x_proj = self.proj(x)                 # (B, D)
        logits = x_proj @ self.label_emb.T    # (B, C)

        return logits

In [46]:
def evaluate(model, loader, thr=0.25):
    model.eval()
    preds, labels = [], []

    with torch.no_grad():
        for batch in loader:
            X = batch["X"]
            y = batch["y"].numpy()

            prob = torch.sigmoid(model(X)).cpu().numpy()
            pred = (prob > thr).astype(int)

            preds.extend(pred)
            labels.extend(y)

    f1s = f1_score(labels, preds, average="samples")
    f1m = f1_score(labels, preds, average="macro")
    return f1s, f1m

In [47]:
import copy
print("\nðŸš€ Training (Mean Teacher Regularization)...")

# ----------------------------
# Hyperparameters
# ----------------------------
epochs = 100
patience = 5
wait = 0
best_f1 = 0

alpha_ema = 0.995       # teacher EMA speed
lambda_cons = 1.5      # weight for consistency loss
noise_std = 0.05        # noise on student input

# ----------------------------
# Init student + teacher
# ----------------------------
student = InnerProductClassifier(
    input_dim=X_train.size(1),
    label_embeddings=label_emb,
    dropout=0.2,
    trainable_label_emb=False
).to(device)

teacher = copy.deepcopy(student).to(device)

optimizer = torch.optim.AdamW(student.parameters(), lr=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)

best_teacher = copy.deepcopy(teacher.state_dict())

# ----------------------------
# Consistency loss
# ----------------------------
def consistency_loss(log_s, log_t):
    ps = torch.sigmoid(log_s)
    pt = torch.sigmoid(log_t)
    return F.mse_loss(ps, pt)

# ----------------------------
# Training loop
# ----------------------------
for epoch in range(1, epochs + 1):
    student.train()
    teacher.eval()

    total_loss = 0

    for batch in tqdm(train_loader, desc=f"Epoch {epoch}"):
        X = batch["X"].to(device)
        y = batch["y"].to(device)

        # Add noise to student
        noisy_X = X + noise_std * torch.randn_like(X)

        # student forward
        logits_s = student(noisy_X)

        # teacher forward (no gradient)
        with torch.no_grad():
            logits_t = teacher(X)

        # supervised = main objective
        loss_sup = F.binary_cross_entropy_with_logits(logits_s, y)

        # consistency = stability objective
        loss_cons = consistency_loss(logits_s, logits_t)

        # total loss
        loss = loss_sup + lambda_cons * loss_cons

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # EMA update (teacher â†’ student)
        for t_param, s_param in zip(teacher.parameters(), student.parameters()):
            t_param.data = alpha_ema * t_param.data + (1 - alpha_ema) * s_param.data

        total_loss += loss.item()

    scheduler.step()


    teacher.eval()
    f1_sample, f1_macro = evaluate(teacher, val_loader)

    print(f"[Epoch {epoch}] Loss={total_loss/len(train_loader):.4f} | F1={f1_sample:.4f}")

    if f1_sample > best_f1:
        best_f1 = f1_sample
        best_teacher = copy.deepcopy(teacher.state_dict())
        wait = 0
        print(f"New best model saved (F1={best_f1:.4f})")
    else:
        wait += 1
        if wait >= patience:
            print("Early stopping triggered")
            break


teacher.load_state_dict(best_teacher)
print("\nðŸŽ‰ Final best F1:", best_f1)



ðŸš€ Training (Mean Teacher Regularization)...


Epoch 1:   0%|          | 0/369 [00:00<?, ?it/s]

Epoch 1: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 189.66it/s]


[Epoch 1] Loss=0.1633 | F1=0.0010
New best model saved (F1=0.0010)


Epoch 2: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 213.04it/s]


[Epoch 2] Loss=0.0396 | F1=0.0001


Epoch 3: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 224.60it/s]


[Epoch 3] Loss=0.0331 | F1=0.0005


Epoch 4: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 237.43it/s]


[Epoch 4] Loss=0.0295 | F1=0.0054
New best model saved (F1=0.0054)


Epoch 5: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 234.62it/s]


[Epoch 5] Loss=0.0273 | F1=0.0319
New best model saved (F1=0.0319)


Epoch 6: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 249.96it/s]


[Epoch 6] Loss=0.0254 | F1=0.0916
New best model saved (F1=0.0916)


Epoch 7: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 263.72it/s]


[Epoch 7] Loss=0.0238 | F1=0.1567
New best model saved (F1=0.1567)


Epoch 8: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 310.85it/s]


[Epoch 8] Loss=0.0227 | F1=0.2187
New best model saved (F1=0.2187)


Epoch 9: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 309.46it/s]


[Epoch 9] Loss=0.0216 | F1=0.2701
New best model saved (F1=0.2701)


Epoch 10: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 317.83it/s]


[Epoch 10] Loss=0.0206 | F1=0.3118
New best model saved (F1=0.3118)


Epoch 11: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 278.27it/s]


[Epoch 11] Loss=0.0198 | F1=0.3453
New best model saved (F1=0.3453)


Epoch 12: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 260.00it/s]


[Epoch 12] Loss=0.0191 | F1=0.3709
New best model saved (F1=0.3709)


Epoch 13: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 235.39it/s]


[Epoch 13] Loss=0.0184 | F1=0.3953
New best model saved (F1=0.3953)


Epoch 14: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 243.40it/s]


[Epoch 14] Loss=0.0179 | F1=0.4140
New best model saved (F1=0.4140)


Epoch 15: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 254.26it/s]


[Epoch 15] Loss=0.0174 | F1=0.4313
New best model saved (F1=0.4313)


Epoch 16: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 294.06it/s]


[Epoch 16] Loss=0.0169 | F1=0.4442
New best model saved (F1=0.4442)


Epoch 17: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 298.79it/s]


[Epoch 17] Loss=0.0165 | F1=0.4549
New best model saved (F1=0.4549)


Epoch 18: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 304.28it/s]


[Epoch 18] Loss=0.0161 | F1=0.4670
New best model saved (F1=0.4670)


Epoch 19: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 304.54it/s]


[Epoch 19] Loss=0.0158 | F1=0.4779
New best model saved (F1=0.4779)


Epoch 20: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 302.65it/s]


[Epoch 20] Loss=0.0156 | F1=0.4839
New best model saved (F1=0.4839)


Epoch 21: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 287.76it/s]


[Epoch 21] Loss=0.0153 | F1=0.4918
New best model saved (F1=0.4918)


Epoch 22: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 310.15it/s]


[Epoch 22] Loss=0.0151 | F1=0.5003
New best model saved (F1=0.5003)


Epoch 23: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 314.77it/s]


[Epoch 23] Loss=0.0149 | F1=0.5086
New best model saved (F1=0.5086)


Epoch 24: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 273.77it/s]


[Epoch 24] Loss=0.0147 | F1=0.5133
New best model saved (F1=0.5133)


Epoch 25: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 283.51it/s]


[Epoch 25] Loss=0.0146 | F1=0.5211
New best model saved (F1=0.5211)


Epoch 26: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 301.40it/s]


[Epoch 26] Loss=0.0144 | F1=0.5253
New best model saved (F1=0.5253)


Epoch 27: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 295.75it/s]


[Epoch 27] Loss=0.0142 | F1=0.5299
New best model saved (F1=0.5299)


Epoch 28: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 271.57it/s]


[Epoch 28] Loss=0.0141 | F1=0.5344
New best model saved (F1=0.5344)


Epoch 29: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 267.25it/s]


[Epoch 29] Loss=0.0139 | F1=0.5385
New best model saved (F1=0.5385)


Epoch 30: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 243.45it/s]


[Epoch 30] Loss=0.0139 | F1=0.5431
New best model saved (F1=0.5431)


Epoch 31: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 278.12it/s]


[Epoch 31] Loss=0.0138 | F1=0.5465
New best model saved (F1=0.5465)


Epoch 32: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 266.20it/s]


[Epoch 32] Loss=0.0137 | F1=0.5496
New best model saved (F1=0.5496)


Epoch 33: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 267.28it/s]


[Epoch 33] Loss=0.0136 | F1=0.5519
New best model saved (F1=0.5519)


Epoch 34: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 256.25it/s]


[Epoch 34] Loss=0.0135 | F1=0.5556
New best model saved (F1=0.5556)


Epoch 35: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 201.85it/s]


[Epoch 35] Loss=0.0134 | F1=0.5562
New best model saved (F1=0.5562)


Epoch 36: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 229.29it/s]


[Epoch 36] Loss=0.0134 | F1=0.5586
New best model saved (F1=0.5586)


Epoch 37: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:02<00:00, 175.82it/s]


[Epoch 37] Loss=0.0133 | F1=0.5608
New best model saved (F1=0.5608)


Epoch 38: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 219.00it/s]


[Epoch 38] Loss=0.0132 | F1=0.5624
New best model saved (F1=0.5624)


Epoch 39: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 206.60it/s]


[Epoch 39] Loss=0.0132 | F1=0.5650
New best model saved (F1=0.5650)


Epoch 40: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:02<00:00, 179.53it/s]


[Epoch 40] Loss=0.0131 | F1=0.5673
New best model saved (F1=0.5673)


Epoch 41: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 221.18it/s]


[Epoch 41] Loss=0.0131 | F1=0.5684
New best model saved (F1=0.5684)


Epoch 42: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 205.78it/s]


[Epoch 42] Loss=0.0130 | F1=0.5706
New best model saved (F1=0.5706)


Epoch 43: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 202.32it/s]


[Epoch 43] Loss=0.0130 | F1=0.5711
New best model saved (F1=0.5711)


Epoch 44: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 230.27it/s]


[Epoch 44] Loss=0.0129 | F1=0.5727
New best model saved (F1=0.5727)


Epoch 45: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 197.82it/s]


[Epoch 45] Loss=0.0129 | F1=0.5743
New best model saved (F1=0.5743)


Epoch 46: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 251.61it/s]


[Epoch 46] Loss=0.0129 | F1=0.5747
New best model saved (F1=0.5747)


Epoch 47: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 234.16it/s]


[Epoch 47] Loss=0.0128 | F1=0.5763
New best model saved (F1=0.5763)


Epoch 48: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 233.56it/s]


[Epoch 48] Loss=0.0128 | F1=0.5774
New best model saved (F1=0.5774)


Epoch 49: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 246.11it/s]


[Epoch 49] Loss=0.0128 | F1=0.5783
New best model saved (F1=0.5783)


Epoch 50: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 215.59it/s]


[Epoch 50] Loss=0.0127 | F1=0.5791
New best model saved (F1=0.5791)


Epoch 51: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 218.63it/s]


[Epoch 51] Loss=0.0127 | F1=0.5803
New best model saved (F1=0.5803)


Epoch 52: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 201.12it/s]


[Epoch 52] Loss=0.0127 | F1=0.5806
New best model saved (F1=0.5806)


Epoch 53: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 221.74it/s]


[Epoch 53] Loss=0.0126 | F1=0.5820
New best model saved (F1=0.5820)


Epoch 54: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 224.22it/s]


[Epoch 54] Loss=0.0126 | F1=0.5832
New best model saved (F1=0.5832)


Epoch 55: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 190.49it/s]


[Epoch 55] Loss=0.0126 | F1=0.5837
New best model saved (F1=0.5837)


Epoch 56: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 191.43it/s]


[Epoch 56] Loss=0.0126 | F1=0.5838
New best model saved (F1=0.5838)


Epoch 57: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 210.97it/s]


[Epoch 57] Loss=0.0126 | F1=0.5846
New best model saved (F1=0.5846)


Epoch 58: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 242.12it/s]


[Epoch 58] Loss=0.0126 | F1=0.5857
New best model saved (F1=0.5857)


Epoch 59: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 217.68it/s]


[Epoch 59] Loss=0.0125 | F1=0.5860
New best model saved (F1=0.5860)


Epoch 60: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 220.82it/s]


[Epoch 60] Loss=0.0125 | F1=0.5856


Epoch 61: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 227.52it/s]


[Epoch 61] Loss=0.0125 | F1=0.5862
New best model saved (F1=0.5862)


Epoch 62: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 237.63it/s]


[Epoch 62] Loss=0.0125 | F1=0.5867
New best model saved (F1=0.5867)


Epoch 63: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 226.16it/s]


[Epoch 63] Loss=0.0125 | F1=0.5869
New best model saved (F1=0.5869)


Epoch 64: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 258.43it/s]


[Epoch 64] Loss=0.0125 | F1=0.5866


Epoch 65: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 254.93it/s]


[Epoch 65] Loss=0.0125 | F1=0.5888
New best model saved (F1=0.5888)


Epoch 66: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 303.44it/s]


[Epoch 66] Loss=0.0124 | F1=0.5884


Epoch 67: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 270.70it/s]


[Epoch 67] Loss=0.0124 | F1=0.5890
New best model saved (F1=0.5890)


Epoch 68: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 277.93it/s]


[Epoch 68] Loss=0.0124 | F1=0.5893
New best model saved (F1=0.5893)


Epoch 69: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 264.03it/s]


[Epoch 69] Loss=0.0124 | F1=0.5888


Epoch 70: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 191.52it/s]


[Epoch 70] Loss=0.0124 | F1=0.5895
New best model saved (F1=0.5895)


Epoch 71: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:02<00:00, 145.51it/s]


[Epoch 71] Loss=0.0124 | F1=0.5899
New best model saved (F1=0.5899)


Epoch 72: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 265.62it/s]


[Epoch 72] Loss=0.0124 | F1=0.5899
New best model saved (F1=0.5899)


Epoch 73: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 243.58it/s]


[Epoch 73] Loss=0.0124 | F1=0.5898


Epoch 74: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 249.95it/s]


[Epoch 74] Loss=0.0124 | F1=0.5905
New best model saved (F1=0.5905)


Epoch 75: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 231.79it/s]


[Epoch 75] Loss=0.0123 | F1=0.5910
New best model saved (F1=0.5910)


Epoch 76: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 198.31it/s]


[Epoch 76] Loss=0.0124 | F1=0.5910
New best model saved (F1=0.5910)


Epoch 77: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 188.64it/s]


[Epoch 77] Loss=0.0124 | F1=0.5916
New best model saved (F1=0.5916)


Epoch 78: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:02<00:00, 183.58it/s]


[Epoch 78] Loss=0.0124 | F1=0.5920
New best model saved (F1=0.5920)


Epoch 79: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 190.72it/s]


[Epoch 79] Loss=0.0123 | F1=0.5917


Epoch 80: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 236.53it/s]


[Epoch 80] Loss=0.0123 | F1=0.5917


Epoch 81: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 185.14it/s]


[Epoch 81] Loss=0.0123 | F1=0.5926
New best model saved (F1=0.5926)


Epoch 82: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 188.96it/s]


[Epoch 82] Loss=0.0123 | F1=0.5927
New best model saved (F1=0.5927)


Epoch 83: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:02<00:00, 165.63it/s]


[Epoch 83] Loss=0.0123 | F1=0.5923


Epoch 84: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:02<00:00, 182.75it/s]


[Epoch 84] Loss=0.0123 | F1=0.5920


Epoch 85: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:02<00:00, 174.54it/s]


[Epoch 85] Loss=0.0123 | F1=0.5927
New best model saved (F1=0.5927)


Epoch 86: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 270.63it/s]


[Epoch 86] Loss=0.0123 | F1=0.5924


Epoch 87: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 268.70it/s]


[Epoch 87] Loss=0.0123 | F1=0.5933
New best model saved (F1=0.5933)


Epoch 88: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 259.41it/s]


[Epoch 88] Loss=0.0123 | F1=0.5933
New best model saved (F1=0.5933)


Epoch 89: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 286.36it/s]


[Epoch 89] Loss=0.0123 | F1=0.5934
New best model saved (F1=0.5934)


Epoch 90: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 263.17it/s]


[Epoch 90] Loss=0.0123 | F1=0.5933


Epoch 91: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 265.98it/s]


[Epoch 91] Loss=0.0123 | F1=0.5934
New best model saved (F1=0.5934)


Epoch 92: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 250.95it/s]


[Epoch 92] Loss=0.0123 | F1=0.5930


Epoch 93: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 259.95it/s]


[Epoch 93] Loss=0.0123 | F1=0.5931


Epoch 94: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 308.96it/s]


[Epoch 94] Loss=0.0123 | F1=0.5931


Epoch 95: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 310.64it/s]


[Epoch 95] Loss=0.0123 | F1=0.5932


Epoch 96: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 369/369 [00:01<00:00, 266.47it/s]


[Epoch 96] Loss=0.0123 | F1=0.5929
Early stopping triggered

ðŸŽ‰ Final best F1: 0.5933874544505268


In [None]:
import csv
import numpy as np
from pathlib import Path

print("\nGenerating submission...")

teacher.eval()

X_test = X_test.to(device)

def select_k(prob, min_k=2, max_k=3):
    idx = np.argsort(prob)[::-1]  # descend
    top3 = idx[:max_k]

    if prob[top3[2]] < 0.25 * prob[top3[1]]:
        return top3[:2]

    return top3


preds = []

with torch.no_grad():
    for start in tqdm(range(0, len(X_test), 64)):
        batch = X_test[start:start+64]
        logits = teacher(batch, use_dropout=False)

        probs = torch.sigmoid(logits).cpu().numpy()

        for p in probs:
            labels = select_k(p)
            preds.append([str(x) for x in labels])


# ==========================================================
# SAVE CSV
# ==========================================================

OUT_DIR = Path("Submission")
OUT_DIR.mkdir(exist_ok=True)
OUT_PATH = OUT_DIR / "submission2_inner.csv"

with open(OUT_PATH, "w", newline="", encoding="utf-8") as f:
    w = csv.writer(f)
    w.writerow(["id", "label"])
    for pid, labels in zip(test_ids, preds):
        w.writerow([pid, ",".join(labels)])

print(f"ðŸŽ‰ Submission saved â†’ {OUT_PATH}")



Generating submission...


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 308/308 [00:00<00:00, 965.47it/s]

ðŸŽ‰ Submission saved â†’ Submission\submission2_inner.csv





: 