In [103]:
import sys
import os
from pathlib import Path
try:
    import pyvips  # type: ignore
    _HAS_PYVIPS = True
except Exception:
    pyvips = None
    _HAS_PYVIPS = False

PROJECT_ROOT = Path("/home/users/ntu/lizh0106/scratch/nscc_work/Baseline_models")
os.chdir(PROJECT_ROOT)
#sys.path.insert(0, str(PROJECT_ROOT))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

from src.config import load_paths, load_params
from src.data.features import prepare_features
from src.models.train import cross_validate, predict_external
from src.evaluation.grading import gleason_to_isup
from src.evaluation.metrics import evaluate_slide_predictions

# CONFIG

In [104]:
base_path =Path("results/hard_labels")

# ===================================
test_folder = "XGB_nol2_with2048" # 需要和base_path链接，里面有aggc_prob_p 和tcga_prob_p

df_aggc_index = "/home/users/ntu/lizh0106/scratch/nscc_work/Processed_Features/AGGC/20x_512/index.csv"
df_tcga_index = "/home/users/ntu/lizh0106/scratch/nscc_work/Processed_Features/TGGA_PRAD_V2/TGGA_PRAD_V2_without_anno/TCGA_20x512/index.csv"

aggc_prob_p = "aggc_oof_tile_proba.npy"
tcga_prob_p = "tcga_proba_mean.npy"


In [105]:
PATTERN_MAP = {0: 3, 1: 4, 2: 5}
# def gleason_to_isup(g1, g2): ...

def agg_from_tiles(
    df_index: pd.DataFrame,
    tile_predictions: np.ndarray,
    pattern_map=None,
    primary_thresh: float = 0.95,
    secondary_min: float = 0.05,
    agg_input: str = "proba",
    # --- 新增：高置信聚合控制 ---
    conf_mode: str = "none",        # "none" | "topk" | "threshold" | "weight"
    top_frac: float = 0.3,          # topk: 取 top 30%
    conf_threshold: float = 0.7,    # threshold: 置信度阈值
    min_tiles: int = 5,            # 筛选后少于这个数 -> fallback
    weight_power: float = 1.0,      # weight: w = conf^power
    conf_use_classes: int = 3,      # 只用前3类(tumor patterns)计算置信度
):
    """
    Aggregate tile-level class predictions to slide-level Gleason/ISUP.

    High-confidence aggregation (proba mode only):
      - conf_mode="topk": keep top_frac of tiles by confidence
      - conf_mode="threshold": keep tiles with confidence >= conf_threshold
      - conf_mode="weight": use all tiles, weight each tile by confidence^weight_power
      - conf_mode="none": no filtering/weighting (your original behavior)
    """
    if pattern_map is None:
        pattern_map = PATTERN_MAP

    if agg_input not in {"labels", "proba"}:
        raise ValueError("agg_input must be either 'labels' or 'proba'.")

    if conf_mode not in {"none", "topk", "threshold", "weight"}:
        raise ValueError("conf_mode must be one of: none, topk, threshold, weight")

    out_rows = []
    for _, row in df_index.iterrows():
        slide_id = row["slide_id"]
        start = int(row["start"])
        end = start + int(row["length"])

        tiles_preds = tile_predictions[start:end]

        if agg_input == "labels":
            # 原逻辑：硬标签计数
            counts = np.bincount(tiles_preds, minlength=4)
            tumor_scores = counts[:3].astype(float)

        else:
            # 原逻辑：proba 汇总（增强：可筛选/加权）
            if tiles_preds.ndim != 2 or tiles_preds.shape[1] < 3:
                raise ValueError(
                    "For agg_input='proba', tile_predictions must have shape (n_tiles, n_classes>=3)."
                )

            proba = tiles_preds[:, :3].astype(float)  # 只取 G3/G4/G5

            # 置信度：只在 tumor 三类内部取 max（避免 other 类干扰）
            # 如果你想用 full classes 做置信度，把 conf_use_classes 改成 proba.shape[1] 并传入全 proba
            conf = proba[:, :min(conf_use_classes, proba.shape[1])].max(axis=1)

            # --- 选择/加权 tile ---
            if conf_mode == "none":
                sel_proba = proba
                weights = None

            elif conf_mode == "topk":
                n = len(conf)
                k = max(1, int(np.ceil(n * top_frac)))
                idx = np.argpartition(conf, -k)[-k:]   # 比 argsort 更快
                sel_proba = proba[idx]
                weights = None

                # fallback：太少就用全体
                if sel_proba.shape[0] < min_tiles:
                    sel_proba = proba
                    weights = None

            elif conf_mode == "threshold":
                idx = np.where(conf >= conf_threshold)[0]
                sel_proba = proba[idx]
                weights = None

                if sel_proba.shape[0] < min_tiles:
                    sel_proba = proba
                    weights = None

            else:  # conf_mode == "weight"
                # 不删 tile，按置信度加权（最稳）
                weights = np.power(conf, weight_power)
                # 防止全 0
                if np.all(weights == 0):
                    weights = None
                sel_proba = proba

            # --- 聚合 ---
            if weights is None:
                tumor_scores = sel_proba.sum(axis=0)
            else:
                tumor_scores = (sel_proba * weights[:, None]).sum(axis=0)

        total_tumor = float(tumor_scores.sum())

        if total_tumor == 0:
            out_rows.append({
                "slide_id": slide_id,
                "p3": 0.0, "p4": 0.0, "p5": 0.0,
                "primary_pattern": None,
                "secondary_pattern": None,
                "gleason": None,
                "ISUP_grade_group": None,
            })
            continue

        p3, p4, p5 = (tumor_scores / total_tumor).tolist()
        fractions = np.array([p3, p4, p5], dtype=float)

        order = np.argsort(-fractions)
        p1_idx, p2_idx = int(order[0]), int(order[1])
        p1_frac, p2_frac = float(fractions[p1_idx]), float(fractions[p2_idx])

        if p1_frac >= primary_thresh or p2_frac < secondary_min:
            g1 = g2 = pattern_map[p1_idx]
        else:
            g1 = pattern_map[p1_idx]
            g2 = pattern_map[p2_idx]

        out_rows.append({
            "slide_id": slide_id,
            "p3": float(p3),
            "p4": float(p4),
            "p5": float(p5),
            "primary_pattern": g1,
            "secondary_pattern": g2,
            "gleason": f"{g1}+{g2}",
            "ISUP_grade_group": gleason_to_isup(g1, g2),
        })

    return pd.DataFrame(out_rows)


In [106]:
df_aggc.head()

Unnamed: 0,slide_id,y,start,length,n_tiles_read,h5_path
0,Subset1_Train_1,1,0,10339,10339,/home/users/ntu/lizh0106/scratch/nscc_work/AGG...
1,Subset1_Train_10,3,10339,14579,14579,/home/users/ntu/lizh0106/scratch/nscc_work/AGG...
2,Subset1_Train_100,3,24918,7562,7562,/home/users/ntu/lizh0106/scratch/nscc_work/AGG...
3,Subset1_Train_101,1,32480,5704,5704,/home/users/ntu/lizh0106/scratch/nscc_work/AGG...
4,Subset1_Train_102,2,38184,10464,10464,/home/users/ntu/lizh0106/scratch/nscc_work/AGG...


In [107]:
df_aggc_index_p = df_aggc_index
df_tcga_index_p = df_tcga_index

aggc_prob_p = base_path / test_folder / aggc_prob_p
tcga_prob_p = base_path / test_folder / tcga_prob_p

df_aggc = pd.read_csv(df_aggc_index_p)
df_tcga = pd.read_csv(df_tcga_index_p)

aggc_tile_proba = np.load(aggc_prob_p)   # shape: (n_tiles, n_classes>=3)
tcga_tile_proba = np.load(tcga_prob_p)

# 先试最稳的：weight（不删tile）
pred_df_aggc = agg_from_tiles(
    df_aggc, aggc_tile_proba,
    agg_input="proba",
    conf_mode="threshold",
    weight_power=5.0,    # 你可以试 1.0 / 2.0 / 3.0
    conf_threshold = 0.5,
    min_tiles=5
)

pred_df_tcga = agg_from_tiles(
    df_tcga, tcga_tile_proba,
    agg_input="proba",
    conf_mode="threshold",
    weight_power=5.0,
    conf_threshold = 0.5,
    min_tiles=5
)

##############################

evaluate_slide_predictions(pred_df_aggc,df_aggc,
    pred_col="ISUP_grade_group",true_col="y",treat_nan_as_class=5,verbose=True,)

print("====================================TCGA")

evaluate_slide_predictions(pred_df_tcga,df_tcga,
    pred_col="ISUP_grade_group",true_col="y",treat_nan_as_class=5,verbose=True,)


=== Slide-level Evaluation ===
Slides with NaN predictions: 0 / 187

--- Including NaNs (as class 5) ---
Accuracy: 0.6256684491978609
--- Excluding NaNs ---
Accuracy: 0.6256684491978609
Balanced accuracy: 0.3787878787878788
Confusion matrix:
 [[ 0  8  3  0  0  0]
 [ 0 65 12  0  1  0]
 [ 0 20 43  1  2  0]
 [ 0  1  3  0  6  0]
 [ 0  0 13  0  9  0]
 [ 0  0  0  0  0  0]] 

Classification report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        11
           1       0.69      0.83      0.76        78
           2       0.58      0.65      0.61        66
           3       0.00      0.00      0.00        10
           4       0.50      0.41      0.45        22

    accuracy                           0.63       187
   macro avg       0.35      0.38      0.36       187
weighted avg       0.55      0.63      0.59       187

=== Slide-level Evaluation ===
Slides with NaN predictions: 0 / 316

--- Including NaNs (as class 5) ---
Accuracy: 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


{'nan_count': 0,
 'acc_all': 0.4810126582278481,
 'confusion_matrix_all': array([[ 0, 20,  5,  0,  0,  0],
        [ 0, 62, 33,  1,  2,  0],
        [ 0, 10, 61,  1,  1,  0],
        [ 0,  1, 30,  2,  1,  0],
        [ 0,  1, 56,  2, 27,  0],
        [ 0,  0,  0,  0,  0,  0]]),
 'acc_valid': 0.4810126582278481,
 'balanced_acc_valid': 0.3682093034729024,
 'classification_report': '              precision    recall  f1-score   support\n\n           0       0.00      0.00      0.00        25\n           1       0.66      0.63      0.65        98\n           2       0.33      0.84      0.47        73\n           3       0.33      0.06      0.10        34\n           4       0.87      0.31      0.46        86\n\n    accuracy                           0.48       316\n   macro avg       0.44      0.37      0.34       316\nweighted avg       0.55      0.48      0.45       316\n'}