In [1]:
# -*- coding: utf-8 -*-

import kagglehub
nbme_score_clinical_patient_notes_path = kagglehub.competition_download('nbme-score-clinical-patient-notes')

print('Data source import complete.')

Data source import complete.


In [2]:
import os, ast, random, json, gc
from pathlib import Path
import glob
import re
from typing import List, Tuple

import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from sklearn.model_selection import GroupKFold

import torch, torch.nn as nn
import io, logging

from torch.utils.data import Dataset, DataLoader

from transformers import (
    AutoTokenizer, AutoModel,
    get_cosine_schedule_with_warmup,
)

In [3]:
# -------------- 1. 全域設定 -------------------------------
class CFG:
    data_dir      = Path("/kaggle/input/nbme-score-clinical-patient-notes")
    competition   = "nbme-score-clinical-patient-notes"
    model_name    = "microsoft/deberta-base"
    max_len       = 512
    batch_size    = 8
    gradient_accum = 2
    epochs        = 4
    lr            = 1e-5  # 🔥 降低學習率
    weight_decay  = 0.01
    scheduler     = "cosine"
    warmup_ratio  = 0.1
    n_folds       = 5
    seed          = 42
    output_dir    = "./nbme_ckpt"
    device        = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Memory optimization settings
    mixed_precision = False
    gradient_checkpointing = True
    
    # Debug settings
    debug_mode = False  # 🔥 關閉調試模式
    run_single_fold = False  # 🔥 運行完整的5-fold交叉驗證

# 設置tokenizer並行
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [4]:
# -------------- Utils: logger & seed --------------------
OUTPUT_DIR = CFG.output_dir

def get_logger(filename=OUTPUT_DIR + '/train'):
    from logging import getLogger, INFO, StreamHandler, FileHandler, Formatter
    log_path = Path(filename).parent
    log_path.mkdir(parents=True, exist_ok=True)

    logger = getLogger(__name__)
    if logger.handlers:
        return logger
    logger.setLevel(INFO)

    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))

    handler2 = FileHandler(f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))

    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

def seed_everything(seed: int = 42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

def cleanup_memory():
    """Force garbage collection and clear CUDA cache"""
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

# Configure mixed precision
if CFG.mixed_precision:
    from torch.cuda.amp import autocast, GradScaler
    scaler = GradScaler()
else:
    from contextlib import nullcontext
    autocast = nullcontext

# Save config
(Path(CFG.output_dir).mkdir(exist_ok=True, parents=True))
cfg_dict = {
    k: (str(v) if isinstance(v, Path) else v)
    for k, v in CFG.__dict__.items()
    if not k.startswith("__") and not callable(v)
}
json.dump(cfg_dict, open(Path(CFG.output_dir)/"cfg.json", "w"), indent=2)

seed_everything(CFG.seed)

In [5]:
# -------------- 2. 資料讀取與預處理 -----------------------
BASE = CFG.data_dir

def _find_csv(keyword: str) -> Path:
    patt = str(BASE / "*.csv")
    files = [Path(f) for f in glob.glob(patt) if keyword.lower() in Path(f).stem.lower()]
    if not files:
        raise FileNotFoundError(f"[Error] CSV containing '{keyword}' not found under {BASE.resolve()}")
    return files[0]

train  = pd.read_csv(_find_csv("train"))
feats  = pd.read_csv(_find_csv("features"))
pnotes = pd.read_csv(_find_csv("patient_notes"))

def _parse_list(x: str):
    try:
        return ast.literal_eval(x) if isinstance(x, str) and x else []
    except Exception:
        return []

def _find_all_spans(note: str, phrase: str) -> List[Tuple[int, int]]:
    spans = []
    for m in re.finditer(re.escape(phrase), note, flags=re.I):
        spans.append((m.start(), m.end()))
    return spans

def apply_annotation_fixes(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    
    MANUAL_FIX: dict[int, tuple[str, str]] = {
        338:  ("father heart attack", "764 783"),
        621:  ("for the last 2-3 months", "77 100"),
        1262: ("mother thyroid problem", "551 572"),
    }
    for idx, (ann, loc) in MANUAL_FIX.items():
        if idx in df.index:
            df.at[idx, "annotation"] = f'["{ann}"]'
            df.at[idx, "location"]   = f'["{loc}"]'

    auto_fixed = 0
    for i, row in df.iterrows():
        anns = _parse_list(row["annotation"])
        locs = _parse_list(row["location"])
        if len(anns) == len(locs) and len(locs) > 0:
            continue
        note = row["pn_history"]
        new_locs = []
        used = set()
        for ann in anns:
            spans = _find_all_spans(note, ann)
            chosen = None
            for s, e in spans:
                if (s, e) not in used:
                    chosen = (s, e); break
            if chosen is None:
                break
            used.add(chosen)
            new_locs.append(f"{chosen[0]} {chosen[1]}")
        if len(new_locs) == len(anns):
            df.at[i, "location"] = str([*new_locs])
            auto_fixed += 1
    LOGGER.info(f"[annotation fix] manual={len(MANUAL_FIX)}, auto={auto_fixed}")
    return df

# Merge data
train = (train.merge(feats, on=["feature_num","case_num"], how="left")
               .merge(pnotes, on=["pn_num","case_num"],    how="left"))

train = apply_annotation_fixes(train)

def str2list(x):
    return ast.literal_eval(x) if isinstance(x,str) and x!="" else []

train["annotation_list"] = train["annotation"].apply(str2list)
train["location_list"]   = train["location"].apply(str2list)

[annotation fix] manual=3, auto=4399


In [6]:
# 數據質量檢查
def debug_data_quality(train_df):
    if not CFG.debug_mode:
        return
        
    LOGGER.info("=== 數據質量檢查 ===")
    
    empty_annotations = 0
    empty_locations = 0
    mismatch_count = 0
    valid_samples = 0
    
    for idx, row in train_df.iterrows():
        anns = row["annotation_list"]
        locs = row["location_list"]
        
        if len(anns) == 0:
            empty_annotations += 1
        if len(locs) == 0:
            empty_locations += 1
        if len(anns) != len(locs):
            mismatch_count += 1
        if len(anns) > 0 and len(locs) > 0 and len(anns) == len(locs):
            valid_samples += 1
    
    LOGGER.info(f"Empty annotations: {empty_annotations}")
    LOGGER.info(f"Empty locations: {empty_locations}")
    LOGGER.info(f"Annotation-location mismatch: {mismatch_count}")
    LOGGER.info(f"Valid samples: {valid_samples}")
    LOGGER.info(f"Total rows: {len(train_df)}")

debug_data_quality(train)

tok = AutoTokenizer.from_pretrained(CFG.model_name, use_fast=True)

def create_char_targets(text: str, spans: List[str]) -> np.ndarray:
    """創建字符級別的目標標籤"""
    targets = np.zeros(len(text), dtype=np.int8)
    for span in spans:
        if not span:
            continue
        # 🔥 修復：正確處理分號分隔的多個位置
        for loc in span.split(";"):
            loc = loc.strip()
            if not loc:
                continue
            try:
                start, end = map(int, loc.split())
                if start >= len(text):
                    continue
                end = min(end, len(text))
                if start < end:  # 確保有效範圍
                    targets[start:end] = 1
            except (ValueError, IndexError):
                continue
    return targets

def encode_example(note: str, feature: str, targets: np.ndarray | None):
    """編碼樣本並創建token級別標籤"""
    enc = tok(
        feature,
        note,
        truncation="only_second",
        padding="max_length",
        max_length=CFG.max_len,
        return_offsets_mapping=True
    )

    if targets is not None:
        labels = np.zeros(len(enc["input_ids"]), dtype=np.float32)
        seq_ids = enc.sequence_ids()
        
        for idx, (s, e) in enumerate(enc["offset_mapping"]):
            # 只處理文本部分的tokens
            if seq_ids[idx] != 1 or s == e:
                continue
            
            # 🔥 修復：改進token-character對應邏輯
            if s < len(targets) and e <= len(targets):
                # 如果token覆蓋的任何字符被標記為正例，則token為正例
                if targets[s:e].sum() > 0:
                    labels[idx] = 1.0
        
        enc["labels"] = labels

    enc.pop("offset_mapping")

    tensor_dict = {}
    for k, v in enc.items():
        if k == "labels":
            tensor_dict[k] = torch.tensor(v, dtype=torch.float)
        else:
            tensor_dict[k] = torch.tensor(v, dtype=torch.long)
    return tensor_dict

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/474 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

In [7]:
# 標籤建立檢查
def debug_label_creation(train_df, sample_size=3):
    if not CFG.debug_mode:
        return
        
    LOGGER.info("\n=== 標籤建立檢查 ===")
    
    valid_samples = []
    for idx, row in train_df.iterrows():
        if len(row["location_list"]) > 0:
            valid_samples.append((idx, row))
        if len(valid_samples) >= sample_size:
            break
    
    for i, (idx, row) in enumerate(valid_samples):
        char_targets = create_char_targets(row.pn_history, row.location_list)
        encoded = encode_example(row.pn_history, row.feature_text, char_targets)
        
        LOGGER.info(f"\n--- Sample {i} (Row {idx}) ---")
        LOGGER.info(f"Text length: {len(row.pn_history)}")
        LOGGER.info(f"Location list: {row.location_list}")
        LOGGER.info(f"Char targets sum: {char_targets.sum()}")
        LOGGER.info(f"Token labels sum: {encoded['labels'].sum().item()}")
        LOGGER.info(f"Total tokens: {len(encoded['labels'])}")
        
        # 顯示一些ground truth文本
        for loc in row.location_list:
            try:
                start, end = map(int, loc.split())
                gt_text = row.pn_history[start:end]
                LOGGER.info(f"GT span ({start},{end}): '{gt_text}'")
            except:
                pass

debug_label_creation(train)

In [8]:
# -------------- 4. 自訂 Dataset ---------------------------
class NBMEDataset(Dataset):
    def __init__(self, df:pd.DataFrame, is_train=True):
        self.df = df
        self.is_train = is_train
    
    def __len__(self): 
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        if self.is_train:
            char_targets = create_char_targets(row.pn_history, row.location_list)
        else:
            char_targets = None
        return encode_example(row.pn_history, row.feature_text, char_targets)

In [9]:
# -------------- 5. 評分工具 ------------------------
def span_to_char_set(spans: List[str]) -> set[int]:
    """將span列表轉換為字符索引集合"""
    char_set = set()
    for sp in spans:
        if not sp:
            continue
        for loc in sp.split(";"):
            loc = loc.strip()
            if not loc:
                continue
            try:
                a, b = map(int, loc.split())
                if a < b:
                    char_set.update(range(a, b))
            except (ValueError, IndexError):
                continue
    return char_set

def compute_micro_f1(pred_df: pd.DataFrame) -> float:
    """計算micro F1分數"""
    tp = fp = fn = 0
    for g, p in zip(pred_df.ground, pred_df.pred):
        gset = span_to_char_set([g] if isinstance(g, str) else g)
        pset = span_to_char_set([p] if isinstance(p, str) else p)
        tp += len(gset & pset)
        fp += len(pset - gset)
        fn += len(gset - pset)
    return 2 * tp / (2 * tp + fp + fn + 1e-8)

# 🔥 修復：改進span重建邏輯
def reconstruct_spans_from_char_probs(char_prob: np.ndarray, threshold: float = 0.5, min_span_len: int = 1) -> List[str]:
    """從字符概率重建spans"""
    spans = []
    start = None
    
    for idx, prob in enumerate(char_prob):
        if prob >= threshold and start is None:
            start = idx
        elif (prob < threshold or idx == len(char_prob) - 1) and start is not None:
            end = idx if prob < threshold else idx + 1
            # 只保留足夠長的spans
            if end - start >= min_span_len:
                spans.append(f"{start} {end}")
            start = None
    
    return spans

def find_optimal_threshold(pred_logits, val_idx, train_df):
    """尋找最佳閾值"""
    LOGGER.info("\n=== 尋找最佳閾值 ===")
    
    thresholds = np.arange(0.01, 0.99, 0.02)  # 更細粒度的閾值搜索
    best_f1 = 0
    best_th = 0.5
    
    for th in thresholds:
        fold_preds = []
        fold_gts = []
        
        for i, row in enumerate(train_df.iloc[val_idx].itertuples()):
            prob = pred_logits[i]
            
            # 編碼以獲取offset mapping
            enc = tok(row.feature_text, row.pn_history,
                      truncation="only_second", max_length=CFG.max_len,
                      return_offsets_mapping=True)
            
            # 重建字符級概率
            char_prob = np.zeros(len(row.pn_history))
            seq_ids = enc.sequence_ids()
            
            for t, (s, e) in enumerate(enc["offset_mapping"]):
                if seq_ids[t] == 1 and s < e and s < len(char_prob):
                    end_idx = min(e, len(char_prob))
                    char_prob[s:end_idx] = np.maximum(char_prob[s:end_idx], prob[t])
            
            # 重建spans
            pred_spans = reconstruct_spans_from_char_probs(char_prob, th)
            pred_span = ";".join(pred_spans)
            
            fold_preds.append(pred_span)
            fold_gts.append(";".join(row.location_list))
        
        f1 = compute_micro_f1(pd.DataFrame({"ground": fold_gts, "pred": fold_preds}))
        
        if f1 > best_f1:
            best_f1 = f1
            best_th = th
            if CFG.debug_mode and f1 > 0:
                LOGGER.info(f"Threshold {th:.3f}: F1={f1:.4f} ⭐")
    
    LOGGER.info(f"Best threshold: {best_th:.3f} (F1={best_f1:.4f})")
    return best_th

In [10]:
# -------------- 6. 模型 -------------------------------
class DebertaForTokenBinary(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = AutoModel.from_pretrained(CFG.model_name)
        
        if CFG.gradient_checkpointing:
            self.backbone.gradient_checkpointing_enable()
            
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(self.backbone.config.hidden_size, 1)
        
    def forward(self, **batch):
        labels = batch.pop("labels", None)
        out = self.backbone(**batch)
        logits = self.classifier(self.dropout(out.last_hidden_state)).squeeze(-1)
        
        loss = None
        if labels is not None:
            # 🔥 修復：使用focal loss來處理類別不平衡
            loss_fct = nn.BCEWithLogitsLoss()
            loss = loss_fct(logits, labels)
        
        return {"loss": loss, "logits": logits}

In [11]:
# -------------- 7. 交叉驗證訓練 ------------------------
oof_preds, oof_gts = [], []
gkf = GroupKFold(n_splits=CFG.n_folds)

fold_splits = list(gkf.split(train, groups=train.pn_num))
if CFG.run_single_fold:
    fold_splits = fold_splits[:1]

for fold, (trn_idx, val_idx) in enumerate(fold_splits):
    LOGGER.info(f"\n========== FOLD {fold} ==========")
    
    cleanup_memory()
    
    trn_ds = NBMEDataset(train.iloc[trn_idx])
    val_ds = NBMEDataset(train.iloc[val_idx])
    
    pin_memory = CFG.device == "cuda"
    trn_loader = DataLoader(trn_ds, batch_size=CFG.batch_size,
                            shuffle=True, num_workers=0, pin_memory=pin_memory)
    val_loader = DataLoader(val_ds, batch_size=CFG.batch_size,
                            shuffle=False, num_workers=0, pin_memory=pin_memory)

    model = DebertaForTokenBinary().to(CFG.device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=CFG.lr,
                                  weight_decay=CFG.weight_decay)
    num_training_steps = CFG.epochs * len(trn_loader) // CFG.gradient_accum
    num_warmup = int(CFG.warmup_ratio * num_training_steps)
    scheduler = get_cosine_schedule_with_warmup(
        optimizer, num_warmup, num_training_steps)

    best_f1 = -1
    best_path = Path(CFG.output_dir) / f"fold{fold}.pt"
    
    for epoch in range(CFG.epochs):
        # Training
        model.train()
        running_loss = 0
        pbar = tqdm(trn_loader, desc=f"Train E{epoch}", dynamic_ncols=True, leave=False)
        
        for step, batch in enumerate(pbar):
            batch = {k: v.to(CFG.device) for k, v in batch.items()}
            
            if CFG.mixed_precision:
                with autocast():
                    out = model(**batch)
                    loss = out["loss"] / CFG.gradient_accum
                scaler.scale(loss).backward()
            else:
                out = model(**batch)
                loss = out["loss"] / CFG.gradient_accum
                loss.backward()
            
            running_loss += loss.item()
            
            if (step + 1) % CFG.gradient_accum == 0 or step + 1 == len(trn_loader):
                if CFG.mixed_precision:
                    scaler.step(optimizer)
                    scaler.update()
                else:
                    optimizer.step()
                scheduler.step()
                optimizer.zero_grad()
                
                pbar.set_postfix(loss=running_loss * CFG.gradient_accum / (step + 1))
        
        # Validation
        model.eval()
        preds = []
        
        with torch.no_grad():
            for batch in tqdm(val_loader, desc="Valid", dynamic_ncols=True, leave=False):
                batch = {k: v.to(CFG.device) for k, v in batch.items()}
                
                if CFG.mixed_precision:
                    with autocast():
                        logits = model(**batch)["logits"].sigmoid().cpu().numpy()
                else:
                    logits = model(**batch)["logits"].sigmoid().cpu().numpy()
                preds.append(logits)
        
        pred_logits = np.concatenate(preds, axis=0)
        
        # 尋找最佳閾值
        best_th = find_optimal_threshold(pred_logits, val_idx, train)
        
        # 使用最佳閾值計算F1
        fold_preds = []
        fold_gts = []
        
        for i, row in enumerate(train.iloc[val_idx].itertuples()):
            prob = pred_logits[i]
            
            enc = tok(row.feature_text, row.pn_history,
                      truncation="only_second", max_length=CFG.max_len,
                      return_offsets_mapping=True)
            
            char_prob = np.zeros(len(row.pn_history))
            seq_ids = enc.sequence_ids()
            
            for t, (s, e) in enumerate(enc["offset_mapping"]):
                if seq_ids[t] == 1 and s < e and s < len(char_prob):
                    end_idx = min(e, len(char_prob))
                    char_prob[s:end_idx] = np.maximum(char_prob[s:end_idx], prob[t])
            
            pred_spans = reconstruct_spans_from_char_probs(char_prob, best_th)
            pred_span = ";".join(pred_spans)
            
            fold_preds.append(pred_span)
            fold_gts.append(";".join(row.location_list))
        
        oof_preds.extend(fold_preds)
        oof_gts.extend(fold_gts)
        
        f1 = compute_micro_f1(pd.DataFrame({"ground": fold_gts, "pred": fold_preds}))
        LOGGER.info(f"Fold {fold} Epoch {epoch} F1={f1:.4f} (threshold={best_th:.3f})")
        
        if f1 > best_f1:
            best_f1 = f1
            torch.save({
                'model_state_dict': model.state_dict(),
                'best_threshold': float(best_th),  # 🔥 確保是Python float
                'best_f1': float(best_f1)  # 🔥 確保是Python float
            }, best_path)
    
    LOGGER.info(f"Fold {fold} best F1={best_f1:.4f}")
    
    del model, optimizer, scheduler
    cleanup_memory()


2025-05-29 13:37:43.025780: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748525863.212882      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748525863.266200      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


pytorch_model.bin:   0%|          | 0.00/559M [00:00<?, ?B/s]

Train E0:   0%|          | 0/1430 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/559M [00:00<?, ?B/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.490 (F1=0.7062)
Fold 0 Epoch 0 F1=0.7062 (threshold=0.490)


Train E1:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.430 (F1=0.7815)
Fold 0 Epoch 1 F1=0.7815 (threshold=0.430)


Train E2:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.430 (F1=0.7979)
Fold 0 Epoch 2 F1=0.7979 (threshold=0.430)


Train E3:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.590 (F1=0.8010)
Fold 0 Epoch 3 F1=0.8010 (threshold=0.590)
Fold 0 best F1=0.8010



Train E0:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.490 (F1=0.7396)
Fold 1 Epoch 0 F1=0.7396 (threshold=0.490)


Train E1:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.490 (F1=0.7957)
Fold 1 Epoch 1 F1=0.7957 (threshold=0.490)


Train E2:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.570 (F1=0.8189)
Fold 1 Epoch 2 F1=0.8189 (threshold=0.570)


Train E3:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.630 (F1=0.8206)
Fold 1 Epoch 3 F1=0.8206 (threshold=0.630)
Fold 1 best F1=0.8206



Train E0:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.550 (F1=0.7026)
Fold 2 Epoch 0 F1=0.7026 (threshold=0.550)


Train E1:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.430 (F1=0.7907)
Fold 2 Epoch 1 F1=0.7907 (threshold=0.430)


Train E2:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.570 (F1=0.8120)
Fold 2 Epoch 2 F1=0.8120 (threshold=0.570)


Train E3:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.590 (F1=0.8157)
Fold 2 Epoch 3 F1=0.8157 (threshold=0.590)
Fold 2 best F1=0.8157



Train E0:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.490 (F1=0.6969)
Fold 3 Epoch 0 F1=0.6969 (threshold=0.490)


Train E1:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.550 (F1=0.7827)
Fold 3 Epoch 1 F1=0.7827 (threshold=0.550)


Train E2:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.510 (F1=0.8035)
Fold 3 Epoch 2 F1=0.8035 (threshold=0.510)


Train E3:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.550 (F1=0.8048)
Fold 3 Epoch 3 F1=0.8048 (threshold=0.550)
Fold 3 best F1=0.8048



Train E0:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.410 (F1=0.7148)
Fold 4 Epoch 0 F1=0.7148 (threshold=0.410)


Train E1:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.590 (F1=0.7842)
Fold 4 Epoch 1 F1=0.7842 (threshold=0.590)


Train E2:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.570 (F1=0.8018)
Fold 4 Epoch 2 F1=0.8018 (threshold=0.570)


Train E3:   0%|          | 0/1430 [00:00<?, ?it/s]

Valid:   0%|          | 0/358 [00:00<?, ?it/s]


=== 尋找最佳閾值 ===
Best threshold: 0.570 (F1=0.8047)
Fold 4 Epoch 3 F1=0.8047 (threshold=0.570)
Fold 4 best F1=0.8047


In [12]:
# -------------- 8. 整體 OOF 分數 ------------------------
overall_f1 = compute_micro_f1(pd.DataFrame({"ground": oof_gts, "pred": oof_preds}))
LOGGER.info(f"\n========== CV micro-F1: {overall_f1:.4f} ==========")

cleanup_memory()

if CFG.debug_mode:
    LOGGER.info("\n=== 調試總結 ===")
    LOGGER.info(f"Training completed with CV F1: {overall_f1:.4f}")
    if overall_f1 > 0:
        LOGGER.info("✅ 訓練成功！")
    else:
        LOGGER.info("⚠️  仍需要進一步調試")


