In [25]:
import json
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from tqdm import tqdm
from pathlib import Path

import warnings
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

# ==========================================================
# CONFIGURATION
# ==========================================================
np.random.seed(42)
torch.manual_seed(42)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"üîß Device: {device}")

# Paths
ROOT = Path("Amazon_products")
TRAIN_CORPUS_PATH = ROOT / "train" / "train_corpus.txt"
TEST_CORPUS_PATH  = ROOT / "test" / "test_corpus.txt"
CLASS_PATH        = ROOT / "classes.txt"

EMB_DIR      = Path("Embeddings")
X_ALL_PATH   = EMB_DIR / "X_train_test_mpn.pt"        # Train + Test embeddings
LABEL_EMB_PATH = EMB_DIR / "labels_hierarchical_new_mpn.pt"

MODEL_SAVE = Path("Models")
MODEL_SAVE.mkdir(exist_ok=True)
MODEL_PATH = MODEL_SAVE / "silver_classifier.pt"

# ==========================================================
# LOAD IDS
# ==========================================================
def load_ids(path):
    ids = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            pid, _ = line.strip().split("\t", 1)
            ids.append(int(pid))
    return ids

train_ids = load_ids(TRAIN_CORPUS_PATH)
test_ids  = load_ids(TEST_CORPUS_PATH)
n_train = len(train_ids)
n_test = len(test_ids)

print(f"Train IDs: {n_train} | Test IDs: {n_test}")

# ==========================================================
# LOAD SILVER LABELS
# ==========================================================
with open("Silver/silver_train_new_mpn.json", "r", encoding="utf-8") as f:
    raw = json.load(f)

silver_labels = {int(pid): data["labels"] for pid, data in raw.items()}

# ==========================================================
# LOAD X_all ‚Üí split into X_train + X_test
# ==========================================================
print("\nüß† Loading X_all.pt ...")

data = torch.load(X_ALL_PATH, weights_only=False)

# ensure tensor
if isinstance(data, np.ndarray):
    data = torch.from_numpy(data)
elif isinstance(data, list):
    data = torch.stack(data)

X_all = data.float().to(device)
assert X_all.shape[0] == n_train + n_test, "Bad size"

X_train = X_all[:n_train]
X_test  = X_all[n_train:]
print(f"‚úì X_train: {X_train.shape}, X_test: {X_test.shape}")

# ==========================================================
# LOAD CLASS NAMES
# ==========================================================
classes = {}
with open(CLASS_PATH, "r", encoding="utf-8") as f:
    for line in f:
        cid, cname = line.strip().split("\t")
        classes[int(cid)] = cname

n_classes = len(classes)

pid2idx = {pid: i for i, pid in enumerate(train_ids)}


üîß Device: cuda
Train IDs: 29487 | Test IDs: 19658

üß† Loading X_all.pt ...
‚úì X_train: torch.Size([29487, 768]), X_test: torch.Size([19658, 768])


In [26]:
class MultiLabelDataset(Dataset):
    def __init__(self, pids, labels_dict):
        self.pids = pids
        self.labels = labels_dict

    def __len__(self):
        return len(self.pids)

    def __getitem__(self, idx):
        pid = self.pids[idx]
        emb = X_train[pid2idx[pid]]

        y = torch.zeros(n_classes)
        for c in self.labels[pid]:
            if 0 <= c < n_classes:
                y[c] = 1.0

        return {"X": emb, "y": y}

# TRAIN / VAL splits
train_p, val_p = train_test_split(list(silver_labels.keys()),
                                  test_size=0.2,
                                  random_state=42)

train_dataset = MultiLabelDataset(train_p, silver_labels)
val_dataset   = MultiLabelDataset(val_p,   silver_labels)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=64)


In [27]:
class Classifier(nn.Module):
    def __init__(self, dim, n_classes):
        super().__init__()
        self.fc1 = nn.Linear(dim, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.drop = nn.Dropout(0.3)
        self.fc2 = nn.Linear(512, n_classes)

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.drop(x)
        return self.fc2(x)

model = Classifier(X_train.size(1), n_classes).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4)

In [28]:
def evaluate(model, loader, thr=0.25):
    model.eval()
    preds, labels = [], []

    with torch.no_grad():
        for batch in loader:
            X = batch["X"]
            y = batch["y"].numpy()

            prob = torch.sigmoid(model(X)).cpu().numpy()
            pred = (prob > thr).astype(int)

            preds.extend(pred)
            labels.extend(y)

    f1s = f1_score(labels, preds, average="samples")
    f1m = f1_score(labels, preds, average="macro")
    return f1s, f1m


In [29]:
import copy
from tqdm import tqdm

print("\nüöÄ Training...")

epochs = 100
patience = 5
wait = 0

best_f1 = 0
best_state = None

optimizer = torch.optim.AdamW(
    model.parameters(), 
    lr=3e-4,
    weight_decay=1e-2
)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=epochs,
    eta_min=5e-6
)


teacher = copy.deepcopy(model)
for p in teacher.parameters():
    p.requires_grad = False  # teacher = no grad

alpha_ema = 0.995

def ema_update(teacher, student, alpha):
    """teacher = alpha*teacher + (1-alpha)*student"""
    for p_t, p_s in zip(teacher.parameters(), student.parameters()):
        p_t.data.mul_(alpha).add_(p_s.data, alpha=1 - alpha)


for epoch in range(1, epochs+1):
    model.train()
    total_loss = 0

    for batch in tqdm(train_loader, desc=f"Epoch {epoch}/{epochs}"):
        X = batch["X"].to(device)
        y = batch["y"].to(device)

        logits = model(X)
        loss = F.binary_cross_entropy_with_logits(logits, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # EMA update
        ema_update(teacher, model, alpha_ema)

        total_loss += loss.item()

    scheduler.step()

    teacher.eval()
    f1s, f1m = evaluate(teacher, val_loader)

    print(f"[Epoch {epoch}] loss={total_loss/len(train_loader):.4f} | F1={f1s:.4f}")

    if f1s > best_f1:
        best_f1 = f1s
        wait = 0
        best_state = copy.deepcopy(teacher.state_dict())
        torch.save(best_state, MODEL_PATH)
        print(f"New best model saved ({best_f1:.4f})")

    else:
        wait += 1
        if wait >= patience:
            print("\nEarly stopping triggered!")
            break

print(f"\nBest validation F1 = {best_f1:.4f}")
print(f"Model saved at: {MODEL_PATH}")

teacher.load_state_dict(best_state)



üöÄ Training...


Epoch 1/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 186.35it/s]


[Epoch 1] loss=0.0694 | F1=0.0108
New best model saved (0.0108)


Epoch 2/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 200.80it/s]


[Epoch 2] loss=0.0157 | F1=0.0108


Epoch 3/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 202.46it/s]


[Epoch 3] loss=0.0123 | F1=0.0144
New best model saved (0.0144)


Epoch 4/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:02<00:00, 178.28it/s]


[Epoch 4] loss=0.0106 | F1=0.1179
New best model saved (0.1179)


Epoch 5/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:02<00:00, 171.30it/s]


[Epoch 5] loss=0.0094 | F1=0.2901
New best model saved (0.2901)


Epoch 6/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 192.14it/s]


[Epoch 6] loss=0.0086 | F1=0.3166
New best model saved (0.3166)


Epoch 7/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 193.26it/s]


[Epoch 7] loss=0.0079 | F1=0.3214
New best model saved (0.3214)


Epoch 8/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 191.89it/s]


[Epoch 8] loss=0.0074 | F1=0.3295
New best model saved (0.3295)


Epoch 9/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 219.75it/s]


[Epoch 9] loss=0.0069 | F1=0.3351
New best model saved (0.3351)


Epoch 10/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 185.00it/s]


[Epoch 10] loss=0.0065 | F1=0.3408
New best model saved (0.3408)


Epoch 11/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 241.43it/s]


[Epoch 11] loss=0.0062 | F1=0.3461
New best model saved (0.3461)


Epoch 12/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 224.34it/s]


[Epoch 12] loss=0.0059 | F1=0.3498
New best model saved (0.3498)


Epoch 13/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 229.87it/s]


[Epoch 13] loss=0.0056 | F1=0.3524
New best model saved (0.3524)


Epoch 14/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 218.44it/s]


[Epoch 14] loss=0.0054 | F1=0.3580
New best model saved (0.3580)


Epoch 15/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 207.91it/s]


[Epoch 15] loss=0.0051 | F1=0.3645
New best model saved (0.3645)


Epoch 16/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 209.24it/s]


[Epoch 16] loss=0.0049 | F1=0.3725
New best model saved (0.3725)


Epoch 17/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 185.07it/s]


[Epoch 17] loss=0.0047 | F1=0.3759
New best model saved (0.3759)


Epoch 18/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 207.61it/s]


[Epoch 18] loss=0.0046 | F1=0.3841
New best model saved (0.3841)


Epoch 19/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 185.06it/s]


[Epoch 19] loss=0.0044 | F1=0.3899
New best model saved (0.3899)


Epoch 20/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 199.44it/s]


[Epoch 20] loss=0.0042 | F1=0.3932
New best model saved (0.3932)


Epoch 21/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:02<00:00, 182.62it/s]


[Epoch 21] loss=0.0040 | F1=0.3976
New best model saved (0.3976)


Epoch 22/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 211.96it/s]


[Epoch 22] loss=0.0039 | F1=0.3994
New best model saved (0.3994)


Epoch 23/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 227.89it/s]


[Epoch 23] loss=0.0038 | F1=0.3980


Epoch 24/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 213.66it/s]


[Epoch 24] loss=0.0037 | F1=0.3861


Epoch 25/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 197.46it/s]


[Epoch 25] loss=0.0035 | F1=0.3675


Epoch 26/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 217.18it/s]


[Epoch 26] loss=0.0034 | F1=0.3348


Epoch 27/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 369/369 [00:01<00:00, 210.20it/s]


[Epoch 27] loss=0.0033 | F1=0.2957

Early stopping triggered!

Best validation F1 = 0.3994
Model saved at: Models\silver_classifier.pt


<All keys matched successfully>

In [None]:
import csv
import numpy as np
from pathlib import Path
from tqdm import tqdm

print("\nüìù Generating submission...")

test_ids = []
with open(TEST_CORPUS_PATH, "r", encoding="utf-8") as f:
    for line in f:
        pid, _ = line.strip().split("\t", 1)
        test_ids.append(int(pid))

print(f"Loaded test IDs: {len(test_ids)}")

X_all = torch.load(X_ALL_PATH, weights_only=False)

if isinstance(X_all, np.ndarray):
    X_all = torch.from_numpy(X_all)
elif isinstance(X_all, list):
    X_all = torch.stack(X_all)

X_all = X_all.float()

n_test = len(test_ids)
X_test = X_all[-n_test:]

# ==========================================================
# LOAD MODEL
# ==========================================================

teacher.eval()
X_test = X_test.to(device)

# ==========================================================
# SELECTION : top-2 / top-3 like self-training
# ==========================================================
def select_k(prob, min_k=2, max_k=3):
    idx = np.argsort(prob)[::-1]     # tri d√©croissant
    top3 = idx[:max_k]

    # Si le 3eme est trop faible ‚Üí garder seulement 2
    if prob[top3[2]] < 0.25 * prob[top3[1]]:
        return top3[:2]

    return top3


# PREDICTION
preds = []

with torch.no_grad():
    for start in tqdm(range(0, len(X_test), 64)):
        batch = X_test[start:start+64]

        if "use_dropout" in model.forward.__code__.co_varnames:
            logits = model(batch, use_dropout=False)
        else:
            logits = model(batch)

        probs = torch.sigmoid(logits).cpu().numpy()

        for p in probs:
            labels = select_k(p)                     
            preds.append([str(x) for x in labels]) 

# SAVE CSV
OUT_DIR = Path("Submission")
OUT_DIR.mkdir(exist_ok=True)
OUT_PATH = OUT_DIR / "submission_mlp.csv"

with open(OUT_PATH, "w", newline="", encoding="utf-8") as f:
    w = csv.writer(f)
    w.writerow(["id", "label"])
    for pid, labels in zip(test_ids, preds):
        w.writerow([pid, ",".join(labels)])

print(f"üéâ Submission saved ‚Üí {OUT_PATH}")



üìù Generating submission...
Loaded test IDs: 19658


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 308/308 [00:00<00:00, 679.49it/s]

üéâ Submission saved ‚Üí Submission\submission_mlp.csv





: 