
# AI vs Human Text Detector — End-to-End (Notebook Version)

This notebook trains a lightweight attention-pooling neural network on sentence-level embeddings (100×768), ensembles across K folds, selects the best sentence→paragraph aggregator using the provided validation set, and generates a Kaggle-ready `submission.csv` with columns `id,y_prob`.

**Expected files in the working directory:**
- `train_ai.npy` and `train_human.npy` — shape `(N, 100, 768)`
- `validation.jsonl` — each line: `{"id": ..., "features": [ [100x768], ... ], "label": 0/1}`
- `test_features.jsonl` — each line: `{"id": ..., "features": [ [100x768], ... ]}`

> Tip: If your environment has no internet access, ensure required packages are already installed. The model uses only: `torch`, `numpy`, `pandas`, `scikit-learn`, `tqdm`.


In [None]:

# Optional: Uncomment if you need to install locally (internet access required)
# !pip install torch numpy pandas scikit-learn tqdm


In [1]:

import os, json, math, random
from pathlib import Path
from typing import List, Tuple, Any

import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, accuracy_score

def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def infer_device() -> torch.device:
    return torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

def exists(path: str) -> bool:
    return Path(path).exists()

def masked_softmax(scores: torch.Tensor, mask: torch.Tensor, dim: int = -1) -> torch.Tensor:
    mask = mask.to(dtype=scores.dtype)
    neg_inf = torch.finfo(scores.dtype).min
    masked_scores = scores.masked_fill(mask == 0, neg_inf)
    return F.softmax(masked_scores, dim=dim)

def make_token_mask(x: torch.Tensor, eps: float = 1e-12) -> torch.Tensor:
    # x: (B, T, D) -> (B, T) mask 1 for non-zero rows
    return (x.abs().sum(dim=-1) > eps).to(x.dtype)


In [3]:

# ==== Config ====
SEED = 42
FOLDS = 5            # increase for more stable ensemble
EPOCHS = 12          # increase if underfitting
BATCH_SIZE = 128
LR = 3e-4
WEIGHT_DECAY = 1e-4

DIM = 768            # embedding dim
MAX_TOKENS = 100     # tokens per sentence

PATH_TRAIN_AI = "data/train/train_ai.npy"
PATH_TRAIN_HU = "data/train/train_human.npy"
PATH_VAL_JSONL = "data/train/validation.jsonl"
PATH_TEST_JSONL = "data/test/test_features.jsonl"

OUT_MODELS_DIR = Path("models")
OUT_MODELS_DIR.mkdir(exist_ok=True, parents=True)


In [4]:

class SentenceDataset(Dataset):
    def __init__(self, X: np.ndarray, y: np.ndarray = None):
        self.X = X.astype(np.float32, copy=False)
        self.y = y.astype(np.float32, copy=False) if y is not None else None

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx: int):
        x = self.X[idx]
        if self.y is None:
            return x
        return x, self.y[idx]

def load_training_arrays(ai_path=PATH_TRAIN_AI, human_path=PATH_TRAIN_HU) -> Tuple[np.ndarray, np.ndarray]:
    assert exists(ai_path), f"Missing {ai_path}"
    assert exists(human_path), f"Missing {human_path}"
    ai = np.load(ai_path)     # (N_ai, 100, 768)
    hu = np.load(human_path)  # (N_h , 100, 768)
    X = np.concatenate([ai, hu], axis=0)
    y = np.concatenate([np.ones(len(ai), dtype=np.float32),
                        np.zeros(len(hu), dtype=np.float32)], axis=0)
    return X, y

def _to_np_sentence(arr_like) -> np.ndarray:
    a = np.array(arr_like, dtype=np.float32)
    if a.ndim != 2:
        a = a.reshape(MAX_TOKENS, -1)
    return a

def load_jsonl_validation(path=PATH_VAL_JSONL):
    ids, feats, labels = [], [], []
    assert exists(path), f"Missing {path}"
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            if not line.strip():
                continue
            obj = json.loads(line)
            pid = obj.get("id")
            label = int(obj.get("label"))
            sents = obj.get("features")
            sentence_mats = [_to_np_sentence(s) for s in sents]
            ids.append(pid)
            feats.append(sentence_mats)
            labels.append(label)
    return ids, feats, labels

def load_jsonl_test(path=PATH_TEST_JSONL):
    ids, feats = [], []
    assert exists(path), f"Missing {path}"
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            if not line.strip():
                continue
            obj = json.loads(line)
            pid = obj.get("id")
            sents = obj.get("features")
            sentence_mats = [_to_np_sentence(s) for s in sents]
            ids.append(pid)
            feats.append(sentence_mats)
    return ids, feats


In [5]:

class TokenAttention(nn.Module):
    def __init__(self, dim: int, attn_hidden: int = 256, dropout: float = 0.1):
        super().__init__()
        self.proj = nn.Linear(dim, attn_hidden)
        self.score = nn.Linear(attn_hidden, 1, bias=False)
        self.dropout = nn.Dropout(dropout)
        self.ln = nn.LayerNorm(dim)

    def forward(self, x: torch.Tensor, mask: torch.Tensor):
        x = self.ln(x)
        h = torch.tanh(self.proj(x))
        h = self.dropout(h)
        scores = self.score(h).squeeze(-1)      # (B, T)
        weights = masked_softmax(scores, mask)  # (B, T)
        pooled = torch.bmm(weights.unsqueeze(1), x).squeeze(1)  # (B, D)
        return pooled, weights

class SentenceClassifier(nn.Module):
    def __init__(self, dim: int = 768, attn_hidden: int = 256, mlp_hidden: int = 512, dropout: float = 0.2):
        super().__init__()
        self.attn = TokenAttention(dim, attn_hidden=attn_hidden, dropout=dropout)
        self.mlp = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Dropout(dropout),
            nn.Linear(dim, mlp_hidden),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(mlp_hidden, 128),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(128, 1)
        )

    def forward(self, x: torch.Tensor):
        mask = make_token_mask(x)
        pooled, _ = self.attn(x, mask)
        logits = self.mlp(pooled).squeeze(-1)
        return logits


In [6]:

def get_loaders(X, y, idx_tr, idx_va, batch_size=BATCH_SIZE):
    ds_tr = SentenceDataset(X[idx_tr], y[idx_tr])
    ds_va = SentenceDataset(X[idx_va], y[idx_va])
    return (
        DataLoader(ds_tr, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True),
        DataLoader(ds_va, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True),
    )

def train_one_fold(model, train_loader, valid_loader, device, epochs=EPOCHS, lr=LR,
                   weight_decay=WEIGHT_DECAY, pos_weight=1.0, fold=0):
    scaler = torch.cuda.amp.GradScaler(enabled=(device.type == "cuda"))
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([pos_weight], device=device))

    best_auc = -1.0
    best_state = None
    oof_logits = []

    for epoch in range(1, epochs+1):
        # train
        model.train()
        train_loss = 0.0
        for x, y in tqdm(train_loader, leave=False, desc=f"[Fold {fold}] Train epoch {epoch}"):
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                logits = model(x)
                loss = criterion(logits, y)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            train_loss += loss.item() * x.size(0)
        scheduler.step()
        train_loss /= len(train_loader.dataset)

        # valid
        model.eval()
        val_logits, val_targets = [], []
        with torch.no_grad():
            for x, y in DataLoader(valid_loader.dataset, batch_size=BATCH_SIZE, shuffle=False):
                x = x.to(device)
                y = y.to(device)
                logits = model(x)
                val_logits.append(logits.detach().cpu().numpy())
                val_targets.append(y.detach().cpu().numpy())
        val_logits = np.concatenate(val_logits)
        val_targets = np.concatenate(val_targets)
        val_probs = 1 / (1 + np.exp(-val_logits))
        try:
            val_auc = roc_auc_score(val_targets, val_probs)
        except Exception:
            val_auc = float("nan")
        oof_logits.append(val_logits)

        if not math.isnan(val_auc) and val_auc > best_auc:
            best_auc = val_auc
            best_state = {k: v.cpu() for k, v in model.state_dict().items()}
        print(f"[Fold {fold}] epoch {epoch:02d}  train_loss={train_loss:.4f}  val_auc={val_auc:.5f}")

    if best_state is not None:
        model.load_state_dict(best_state)
    return model, np.concatenate(oof_logits), best_auc


In [7]:

def logits_to_prob(logits: np.ndarray) -> np.ndarray:
    return 1.0 / (1.0 + np.exp(-logits))

def agg_prob_mean(probs): return float(np.mean(probs))
def agg_prob_max(probs):  return float(np.max(probs))
def agg_logit_mean(logits): return float(logits_to_prob(np.mean(logits)))

def choose_best_aggregator(valid_ids, valid_feats, valid_labels, models, device):
    aggregators = {
        "logit_mean": lambda probs, logits: agg_logit_mean(logits),
        "prob_mean":  lambda probs, logits: agg_prob_mean(probs),
        "prob_max":   lambda probs, logits: agg_prob_max(probs),
    }
    aucs = {}
    for name, agg in aggregators.items():
        y_true, y_score = [], []
        for pid, sents, label in zip(valid_ids, valid_feats, valid_labels):
            sent_logits = []
            with torch.no_grad():
                for s in sents:
                    x = torch.tensor(s, dtype=torch.float32, device=device).unsqueeze(0)
                    logits_per_model = []
                    for m in models:
                        m.eval()
                        l = m(x).item()
                        logits_per_model.append(l)
                    sent_logits.append(np.mean(logits_per_model))
            sent_probs = [logits_to_prob(l) for l in sent_logits]
            y_true.append(label)
            y_score.append(agg(sent_probs, sent_logits))
        try:
            auc = roc_auc_score(y_true, y_score)
        except Exception:
            auc = float("nan")
        aucs[name] = auc
    best_name = max(aucs, key=lambda k: (aucs[k] if not math.isnan(aucs[k]) else -1e9))
    print("Aggregator AUCs:", aucs)
    return best_name, aucs[best_name]

@torch.no_grad()
def predict_paragraph_scores(ids, feats, models, device, aggregator="logit_mean"):
    if aggregator == "prob_mean":
        agg_fn = lambda probs, logits: agg_prob_mean(probs)
    elif aggregator == "prob_max":
        agg_fn = lambda probs, logits: agg_prob_max(probs)
    else:
        agg_fn = lambda probs, logits: agg_logit_mean(logits)

    out_ids, out_probs = [], []
    for pid, sents in zip(ids, feats):
        sent_logits = []
        for s in sents:
            x = torch.tensor(s, dtype=torch.float32, device=device).unsqueeze(0)
            logits_per_model = []
            for m in models:
                m.eval()
                l = m(x).item()
                logits_per_model.append(l)
            sent_logits.append(float(np.mean(logits_per_model)))
        sent_probs = [logits_to_prob(l) for l in sent_logits]
        p = agg_fn(sent_probs, sent_logits)
        out_ids.append(pid)
        out_probs.append(p)
    return pd.DataFrame({"id": out_ids, "y_prob": out_probs})


In [8]:

seed_everything(SEED)
device = infer_device()
print("Using device:", device)

# Load sentence-level arrays
X, y = load_training_arrays()
print(f"Train arrays: {X.shape}, labels: pos={int(y.sum())}, neg={int(len(y)-y.sum())}")

pos, neg = y.sum(), len(y)-y.sum()
pos_weight = max(neg / max(pos, 1.0), 1.0)
print(f"pos_weight={pos_weight:.3f}")

skf = StratifiedKFold(n_splits=FOLDS, shuffle=True, random_state=SEED)
models, fold_aucs = [], []
oof_sentence_logits = np.zeros_like(y, dtype=np.float32)

for fold, (idx_tr, idx_va) in enumerate(skf.split(X, y)):
    model = SentenceClassifier(dim=DIM, attn_hidden=256, mlp_hidden=512, dropout=0.2).to(device)
    tr_loader, va_loader = get_loaders(X, y, idx_tr, idx_va, BATCH_SIZE)
    model, val_logits, val_auc = train_one_fold(model, tr_loader, va_loader, device,
                                                epochs=EPOCHS, lr=LR, pos_weight=pos_weight, fold=fold)
    models.append(model)
    oof_sentence_logits[idx_va] = val_logits[:len(idx_va)]
    fold_aucs.append(val_auc)
    torch.save(model.state_dict(), OUT_MODELS_DIR / f"fold_{fold}.pt")
    print(f"[Fold {fold}] best val AUC: {val_auc:.5f}")

print("Mean val AUC across folds:", np.nanmean(fold_aucs))
np.save("oof_sentence_preds.npy", oof_sentence_logits)


Using device: cpu
Train arrays: (16322, 100, 768), labels: pos=8161, neg=8161
pos_weight=1.000


  scaler = torch.cuda.amp.GradScaler(enabled=(device.type == "cuda"))
  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 0] epoch 01  train_loss=0.4995  val_auc=0.91467


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 0] epoch 02  train_loss=0.3671  val_auc=0.94713


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 0] epoch 03  train_loss=0.3389  val_auc=0.94798


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 0] epoch 04  train_loss=0.3108  val_auc=0.95658


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 0] epoch 05  train_loss=0.3402  val_auc=0.95258


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 0] epoch 06  train_loss=0.2993  val_auc=0.95877


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 0] epoch 07  train_loss=0.2981  val_auc=0.95871


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 0] epoch 08  train_loss=0.2970  val_auc=0.95824


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 0] epoch 09  train_loss=0.2920  val_auc=0.95806


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 0] epoch 10  train_loss=0.2884  val_auc=0.95911


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 0] epoch 11  train_loss=0.2794  val_auc=0.95860


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 0] epoch 12  train_loss=0.2789  val_auc=0.95882
[Fold 0] best val AUC: 0.95911


  scaler = torch.cuda.amp.GradScaler(enabled=(device.type == "cuda"))
  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 1] epoch 01  train_loss=0.5108  val_auc=0.90117


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 1] epoch 02  train_loss=0.3835  val_auc=0.92925


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 1] epoch 03  train_loss=0.3425  val_auc=0.94436


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 1] epoch 04  train_loss=0.3188  val_auc=0.94313


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 1] epoch 05  train_loss=0.3178  val_auc=0.94946


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 1] epoch 06  train_loss=0.2973  val_auc=0.95256


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 1] epoch 07  train_loss=0.3020  val_auc=0.95390


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 1] epoch 08  train_loss=0.2884  val_auc=0.95482


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 1] epoch 09  train_loss=0.2852  val_auc=0.95493


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 1] epoch 10  train_loss=0.2797  val_auc=0.95511


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 1] epoch 11  train_loss=0.2799  val_auc=0.95555


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 1] epoch 12  train_loss=0.2755  val_auc=0.95569
[Fold 1] best val AUC: 0.95569


  scaler = torch.cuda.amp.GradScaler(enabled=(device.type == "cuda"))
  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 2] epoch 01  train_loss=0.5014  val_auc=0.89947


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 2] epoch 02  train_loss=0.3650  val_auc=0.93886


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 2] epoch 03  train_loss=0.3430  val_auc=0.94411


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 2] epoch 04  train_loss=0.3234  val_auc=0.94759


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 2] epoch 05  train_loss=0.3083  val_auc=0.94964


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 2] epoch 06  train_loss=0.2994  val_auc=0.95014


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 2] epoch 07  train_loss=0.2941  val_auc=0.95086


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 2] epoch 08  train_loss=0.2885  val_auc=0.95299


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 2] epoch 09  train_loss=0.2898  val_auc=0.95299


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 2] epoch 10  train_loss=0.2781  val_auc=0.95351


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 2] epoch 11  train_loss=0.2757  val_auc=0.95364


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 2] epoch 12  train_loss=0.2798  val_auc=0.95369
[Fold 2] best val AUC: 0.95369


  scaler = torch.cuda.amp.GradScaler(enabled=(device.type == "cuda"))
  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 3] epoch 01  train_loss=0.5028  val_auc=0.90848


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 3] epoch 02  train_loss=0.3804  val_auc=0.93462


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 3] epoch 03  train_loss=0.3330  val_auc=0.94549


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 3] epoch 04  train_loss=0.3269  val_auc=0.94708


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 3] epoch 05  train_loss=0.3089  val_auc=0.95193


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 3] epoch 06  train_loss=0.2942  val_auc=0.95413


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 3] epoch 07  train_loss=0.2888  val_auc=0.95491


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 3] epoch 08  train_loss=0.2835  val_auc=0.95515


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 3] epoch 09  train_loss=0.2774  val_auc=0.95557


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 3] epoch 10  train_loss=0.2782  val_auc=0.95602


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 3] epoch 11  train_loss=0.2773  val_auc=0.95631


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 3] epoch 12  train_loss=0.2738  val_auc=0.95633
[Fold 3] best val AUC: 0.95633


  scaler = torch.cuda.amp.GradScaler(enabled=(device.type == "cuda"))
  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 4] epoch 01  train_loss=0.4988  val_auc=0.92000


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 4] epoch 02  train_loss=0.3769  val_auc=0.93977


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 4] epoch 03  train_loss=0.3436  val_auc=0.95039


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 4] epoch 04  train_loss=0.3203  val_auc=0.95376


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 4] epoch 05  train_loss=0.3125  val_auc=0.95164


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 4] epoch 06  train_loss=0.2999  val_auc=0.95765


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 4] epoch 07  train_loss=0.2959  val_auc=0.95852


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 4] epoch 08  train_loss=0.2826  val_auc=0.95937


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 4] epoch 09  train_loss=0.2841  val_auc=0.95918


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 4] epoch 10  train_loss=0.2770  val_auc=0.95971


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 4] epoch 11  train_loss=0.2738  val_auc=0.95987


  with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                                                                                

[Fold 4] epoch 12  train_loss=0.2759  val_auc=0.96003
[Fold 4] best val AUC: 0.96003
Mean val AUC across folds: 0.9569711046188811


In [9]:

agg_name, agg_auc = "logit_mean", float("nan")
if exists(PATH_VAL_JSONL):
    val_ids, val_feats, val_labels = load_jsonl_validation(PATH_VAL_JSONL)
    agg_name, agg_auc = choose_best_aggregator(val_ids, val_feats, val_labels, models, device)
    df_val = predict_paragraph_scores(val_ids, val_feats, models, device, aggregator=agg_name)
    df_val["label"] = val_labels
    try:
        val_auc_final = roc_auc_score(val_labels, df_val["y_prob"])
        val_acc = accuracy_score(val_labels, (df_val["y_prob"] >= 0.5).astype(int))
        print(f"Validation with aggregator='{agg_name}': AUC={val_auc_final:.5f}  ACC@0.5={val_acc:.4f}")
    except Exception as e:
        print("Validation metric error:", e)
    df_val.to_csv("validation_preds.csv", index=False)
else:
    print("validation.jsonl not found; defaulting to aggregator='logit_mean'")
print("Selected aggregator:", agg_name, "AUC:", agg_auc)


Aggregator AUCs: {'logit_mean': 0.95, 'prob_mean': 0.9400000000000001, 'prob_max': 0.97}
Validation with aggregator='prob_max': AUC=0.97000  ACC@0.5=0.5000
Selected aggregator: prob_max AUC: 0.97


In [10]:

if exists(PATH_TEST_JSONL):
    test_ids, test_feats = load_jsonl_test(PATH_TEST_JSONL)
    df_sub = predict_paragraph_scores(test_ids, test_feats, models, device, aggregator=agg_name)
    df_sub = df_sub[["id", "y_prob"]]
    df_sub.to_csv("submission.csv", index=False)
    display(df_sub.head())
    print("Saved Kaggle submission to submission.csv")
else:
    print("test_features.jsonl not found; skipping submission generation.")


Unnamed: 0,id,y_prob
0,15,0.640365
1,16,0.589388
2,17,0.442631
3,18,0.816809
4,19,0.958217


Saved Kaggle submission to submission.csv
