### 3D island postprocessing of Strat X lobe segmentation predictions

In [None]:
import os
import nibabel as nib
import numpy as np
from pathlib import Path
from scipy.ndimage import label, binary_dilation

# === Directories ===
input_dir = r"------ INSERT PATH HERE ------"
output_dir = os.path.join(os.path.dirname(input_dir), "pred_folder_lobes134_best25003dEdited_2500")
os.makedirs(output_dir, exist_ok=True)

# === Function to assign an isolated component to the neighboring label with largest contact ===
def reassign_to_largest_border_component(island_mask, full_mask):
    dilated = binary_dilation(island_mask, iterations=1)
    border_voxels = dilated & (full_mask > 0) & (~island_mask)
    neighbor_labels, counts = np.unique(full_mask[border_voxels], return_counts=True)
    if len(counts) == 0:
        return 0  # assign to background
    return neighbor_labels[np.argmax(counts)]

# === Main postprocessing function ===
def remove_isolated_components(filepath, output_dir):
    img = nib.load(filepath)
    data = img.get_fdata().astype(np.uint8)
    new_data = np.zeros_like(data)

    label_changes = {}
    to_background_volumes = {}
    total_voxels_changed = 0

    for label_id in range(1, 6):  # Labels 1 to 5
        binary = (data == label_id)
        if not np.any(binary):
            continue
        labeled_cc, num = label(binary)
        sizes = np.bincount(labeled_cc.ravel())
        sizes[0] = 0
        if len(sizes) == 1:
            new_data[labeled_cc == 1] = label_id
            continue
        largest_cc = np.argmax(sizes)
        new_data[labeled_cc == largest_cc] = label_id
        for i in range(1, num + 1):
            if i == largest_cc:
                continue
            island_mask = (labeled_cc == i)
            new_label = reassign_to_largest_border_component(island_mask, data)
            vol = np.sum(island_mask)
            total_voxels_changed += vol
            if new_label == 0:
                to_background_volumes[filepath.name] = to_background_volumes.get(filepath.name, 0) + vol
            else:
                key = (filepath.name, label_id, new_label)
                label_changes[key] = label_changes.get(key, 0) + vol
            new_data[island_mask] = new_label

    out_path = os.path.join(output_dir, os.path.basename(filepath))
    nib.save(nib.Nifti1Image(new_data, img.affine, img.header), out_path)
    return label_changes, to_background_volumes, total_voxels_changed

# === Process all files ===
all_files = list(Path(input_dir).glob("*.nii.gz"))
total_label_changes = {}
total_bg_changes = {}

for f in all_files:
    print(f"🔄 Processing {f.name} ...")
    label_chg, bg_chg, total_changed = remove_isolated_components(f, output_dir)
    for k, v in label_chg.items():
        total_label_changes[k] = total_label_changes.get(k, 0) + v
    for k, v in bg_chg.items():
        total_bg_changes[k] = total_bg_changes.get(k, 0) + v
    print(f"✅ Done {f.name} — total voxels reassigned: {total_changed}")

# === Summary ===
print("\n🔝 Top 5 conversions from one label to another:")
for (name, from_label, to_label), vol in sorted(total_label_changes.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"{name}: {vol} voxels from label {from_label} → {to_label}")

print("\n🔝 Top 5 conversions from label to background:")
for name, vol in sorted(total_bg_changes.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"{name}: {vol} voxels converted to background")


## Calculating the fissure completeness of StratX dataset

In [None]:
import os, numpy as np, nibabel as nib, pandas as pd
from pathlib import Path
from tqdm import tqdm

# ── Input paths ─────────────────────────────────────────────────────────────
root_dir = r"------ INSERT PATH HERE ------"
pred_dir = os.path.join(root_dir, "Pred")

dirs = {
    "LOF": os.path.join(pred_dir, "pred_folder_LOF134_best5000"),
    "ROF": os.path.join(pred_dir, "pred_folder_ROF134_best5000"),
    "RHF": os.path.join(pred_dir, "pred_folder_RHF134_best5000"),
}
lobe_dir = os.path.join(pred_dir, "pred_folder_lobes134_best25003dEdited_2500")
out_xls  = os.path.join(pred_dir, "fissure_completeness_all_columnwise.xlsx")

# ── Algorithm params ────────────────────────────────────────────────────────
z_window, max_search, max_gap = 5, 4, 5
junctions = {
    "LOF": (4, 5),
    "RHF": (1, 2),
    "ROF1": (1, 3),
    "ROF2": (2, 3)
}

def get_covered_and_total(fiss, lob, A, B):
    h, n_slices, w = lob.shape
    covered = total = 0
    for y in range(n_slices):
        lob_sl, fiss_sl = lob[:, y, :], fiss[:, y, :]
        prevA = prevB = None
        for x in range(h):
            col = lob_sl[x, :]
            if A not in col or B not in col:
                continue
            candA = [z for z in np.where(col == A)[0] if np.any(col[max(z-max_search,0): z+max_search+1] == B)]
            candB = [z for z in np.where(col == B)[0] if np.any(col[max(z-max_search,0): z+max_search+1] == A)]
            border_z = []
            if candA:
                zA = min(candA)
                if prevA is None or abs(zA-prevA) <= max_gap:
                    border_z.append(zA); prevA = zA
                else: prevA = None
            if candB:
                zB = max(candB)
                if prevB is None or abs(zB-prevB) <= max_gap:
                    border_z.append(zB); prevB = zB
                else: prevB = None
            for z in border_z:
                total += 1
                for dz in range(-z_window, z_window+1):
                    zz = z + dz
                    if 0 <= zz < w and fiss_sl[x, zz]:
                        covered += 1
                        break
    return covered, total

def strip_nii(name: str) -> str:
    return name.replace(".nii.gz", "").replace(".nii", "").split("_postprocessed")[0]

all_mrns = set()
for ftype, fdir in dirs.items():
    all_mrns.update(strip_nii(p.name) for p in Path(fdir).rglob("*.nii*"))

print(f"🔍 Found {len(all_mrns)} unique MRNs across all fissure predictions")

records = []

for mrn in tqdm(sorted(all_mrns), desc="Computing all completeness values"):
    print(f"\n📋 Processing MRN: {mrn}")
    
    lobepath = Path(lobe_dir) / f"{mrn}.nii.gz"
    if not lobepath.exists():
        lobepath = Path(lobe_dir) / f"{mrn}.nii"
    if not lobepath.exists():
        print(f"   ❌ Lobe mask missing for {mrn} - skipped")
        continue

    lob = nib.load(lobepath).get_fdata().astype(np.uint8)
    entry = {"MRN": mrn}
    cov_tot_sum = [0, 0]
    rul_cov_tot = [0, 0]
    
    # LOF
    fpath = Path(dirs["LOF"]) / f"{mrn}.nii.gz"
    if not fpath.exists():
        fpath = Path(dirs["LOF"]) / f"{mrn}.nii"
    if not fpath.exists():
        entry["LOF"] = np.nan
        print(f"   ❌ LOF prediction missing")
    else:
        fiss = (nib.load(fpath).get_fdata() > 0).astype(np.uint8)
        cov, tot = get_covered_and_total(fiss, lob, *junctions["LOF"])
        entry["LOF"] = np.nan if tot == 0 else 100.0 * cov / tot
        print(f"   ✅ LOF: {entry['LOF']:.1f}% ({cov}/{tot})")

    # RHF
    fpath = Path(dirs["RHF"]) / f"{mrn}.nii.gz"
    if not fpath.exists():
        fpath = Path(dirs["RHF"]) / f"{mrn}.nii"
    if not fpath.exists():
        entry["RHF"] = np.nan
        print(f"   ❌ RHF prediction missing")
    else:
        fiss = (nib.load(fpath).get_fdata() > 0).astype(np.uint8)
        cov, tot = get_covered_and_total(fiss, lob, *junctions["RHF"])
        entry["RHF"] = np.nan if tot == 0 else 100.0 * cov / tot
        cov_tot_sum[0] += cov
        cov_tot_sum[1] += tot
        rul_cov_tot[0] += cov
        rul_cov_tot[1] += tot
        print(f"   ✅ RHF: {entry['RHF']:.1f}% ({cov}/{tot})")

    # ROF
    fpath = Path(dirs["ROF"]) / f"{mrn}.nii.gz"
    if not fpath.exists():
        fpath = Path(dirs["ROF"]) / f"{mrn}.nii"
    if not fpath.exists():
        entry["ROF_combined"] = np.nan
        print(f"   ❌ ROF prediction missing")
    else:
        fiss = (nib.load(fpath).get_fdata() > 0).astype(np.uint8)
        rof_total_cov = rof_total_tot = 0
        cov1, tot1 = get_covered_and_total(fiss, lob, *junctions["ROF1"])
        rof_total_cov += cov1
        rof_total_tot += tot1
        rul_cov_tot[0] += cov1
        rul_cov_tot[1] += tot1
        print(f"   📊 ROF1 (1↔3): ({cov1}/{tot1})")
        cov2, tot2 = get_covered_and_total(fiss, lob, *junctions["ROF2"])
        rof_total_cov += cov2
        rof_total_tot += tot2
        print(f"   📊 ROF2 (2↔3): ({cov2}/{tot2})")
        entry["ROF_combined"] = np.nan if rof_total_tot == 0 else 100.0 * rof_total_cov / rof_total_tot
        print(f"   ✅ ROF Combined: {entry['ROF_combined']:.1f}% ({rof_total_cov}/{rof_total_tot})")
        cov_tot_sum[0] += cov2
        cov_tot_sum[1] += tot2

    entry["RML1"] = np.nan if cov_tot_sum[1] == 0 else 100.0 * cov_tot_sum[0] / cov_tot_sum[1]
    print(f"   ✅ RML1 (RHF+ROF2): {entry['RML1']:.1f}% ({cov_tot_sum[0]}/{cov_tot_sum[1]})")

    entry["RUL"] = np.nan if rul_cov_tot[1] == 0 else 100.0 * rul_cov_tot[0] / rul_cov_tot[1]
    print(f"   ✅ RUL (RHF+ROF1): {entry['RUL']:.1f}% ({rul_cov_tot[0]}/{rul_cov_tot[1]})")

    records.append(entry)

df = pd.DataFrame(records).sort_values("MRN").reset_index(drop=True)
df.to_excel(out_xls, index=False)
print(f"\n✅ Saved to {out_xls}")
print(f"📊 Processed {len(records)} MRNs total")


## Quality check of the predicted fissures vs lobe borders - StratX dataset

In [None]:
import os, numpy as np, nibabel as nib, pandas as pd
from pathlib import Path

# === Parameters ===
threshold_fissure_far_from_border = 30
threshold_multiple_borders = 30

# === Paths ===
root_dir = r"------ INSERT PATH HERE ------"
lobes_dir = os.path.join(root_dir, "pred_folder_lobes134_best25003dEdited_2500")
fissure_dirs = {
    "LOF": os.path.join(root_dir, "pred_folder_LOF134_best5000"),
    "ROF": os.path.join(root_dir, "pred_folder_ROF134_best5000"),
    "RHF": os.path.join(root_dir, "pred_folder_RHF134_best5000"),
}
out_xls = os.path.join(root_dir, "fissure_not_amenable_summary_final.xlsx")

# === Define fissure border pairs ===
fissure_defs = {
    "LOF": [(4, 5)],
    "ROF": [(1, 3), (2, 3)],
    "RHF": [(1, 2)],
    "RML": [(1, 2), (2, 3)],
    "RUL": [(1, 2), (1, 3)],
}

def strip_nii(name): return name.replace(".nii.gz", "").replace(".nii", "")

# === MRNs with all files present ===
lobes_mrns = {strip_nii(f.name) for f in Path(lobes_dir).glob("*.nii*")}
fissure_mrns = {k: {strip_nii(f.name) for f in Path(v).glob("*.nii*")} for k, v in fissure_dirs.items()}
common_mrns = lobes_mrns & fissure_mrns["ROF"] & fissure_mrns["RHF"] & fissure_mrns["LOF"]

# === Processing loop ===
all_results = []

for idx, mrn in enumerate(sorted(common_mrns), 1):
    print(f"\n🔄 [{idx}/{len(common_mrns)}] Processing MRN: {mrn}")
    lobes_path = os.path.join(lobes_dir, f"{mrn}.nii.gz")
    if not os.path.exists(lobes_path):
        lobes_path = os.path.join(lobes_dir, f"{mrn}.nii")
    lobes = nib.load(lobes_path).get_fdata().astype(np.uint8)

    fissures = {
        k: (nib.load(os.path.join(d, f"{mrn}.nii.gz")).get_fdata() > 0).astype(np.uint8)
        for k, d in fissure_dirs.items()
    }

    H, D, W = lobes.shape
    mrn_result = {"MRN": mrn}

    for fissure_name, border_pairs in fissure_defs.items():
        print(f"   ➤ Fissure: {fissure_name}")
        total = 0
        not_amenable = 0

        for (A, B) in border_pairs:
            # Fissure prediction source
            if fissure_name in ["RML", "RUL"]:
                if (A, B) == (1, 2):
                    fissure_pred = fissures["RHF"]
                elif (A, B) == (2, 3) or (A, B) == (1, 3):
                    fissure_pred = fissures["ROF"]
                else:
                    raise ValueError(f"Unexpected lobe pair for {fissure_name} in MRN {mrn}: ({A}, {B})")
            else:
                fissure_pred = fissures[fissure_name]

            for y in range(D):
                for x in range(H):
                    line = lobes[x, y, :]
                    line_pred = fissure_pred[x, y, :]

                    if A not in line or B not in line:
                        continue
                    total += 1

                    border_rows = []
                    for z in range(W):
                        if line[z] == A and np.any(line[max(0, z-1):z+2] == B):
                            border_rows.append(z)
                        elif line[z] == B and np.any(line[max(0, z-1):z+2] == A):
                            border_rows.append(z)

                    fissure_rows = np.where(line_pred > 0)[0]

                    # Heuristic 1
                    if len(fissure_rows) > 0 and len(border_rows) > 0:
                        min_dist = min([np.min(np.abs(fissure_rows - b)) for b in border_rows])
                        if min_dist > threshold_fissure_far_from_border:
                            not_amenable += 1
                            continue

                    # Heuristic 2
                    if len(border_rows) > 1 and (np.max(border_rows) - np.min(border_rows) > threshold_multiple_borders):
                        not_amenable += 1
                        continue

        percent = 100 * not_amenable / total if total else 0
        mrn_result[f"{fissure_name}_not_amenable"] = not_amenable
        mrn_result[f"{fissure_name}_total"] = total
        mrn_result[f"{fissure_name}_percent"] = round(percent, 2)
        print(f"     ⬅️ {fissure_name}: {not_amenable}/{total} not amenable ({percent:.2f}%)")

    all_results.append(mrn_result)

# === Format and Save as Excel ===
df = pd.DataFrame(all_results)
fissures_order = ["LOF", "RHF", "RML", "RUL", "ROF"]
metrics = ["not_amenable", "total", "percent"]
ordered_cols = ["MRN"] + [f"{f}_{m}" for f in fissures_order for m in metrics]
df = df.reindex(columns=ordered_cols)

df.to_excel(out_xls, index=False)
print(f"\n✅ Saved summary to: {out_xls}")



## Scatter plotting the completeness scores with Pearson's correlation coefficient and regression line (with 95% CI)

In [None]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import scipy.stats as stats

# === Load all columns from a single file ===
df = pd.read_excel(r"------ INSERT PATH HERE ------\fissure_completeness_all_columnwise_S_2.xlsx")

# === Setup 2x2 plot ===
fig, axes = plt.subplots(2, 2, figsize=(14, 12))

# === Helper function with 95% CI shading (NO ICC) ===
def plot_and_stats(df, stratx_col, model_col, mrn_col, title, ax):
    x = df[stratx_col]  # Reference
    y = df[model_col]   # Model
    mrns = df[mrn_col]

    sorted_idx = np.argsort(x)
    x_sorted = x.values[sorted_idx]
    y_sorted = y.values[sorted_idx]
    mrns_sorted = mrns.values[sorted_idx]

    x_vals = x_sorted.reshape(-1, 1)
    mdl = LinearRegression().fit(x_vals, y_sorted)
    yhat = mdl.predict(x_vals)
    r2 = r2_score(y_sorted, yhat)

    n = len(x_vals)
    se = np.sqrt(np.sum((y_sorted - yhat) ** 2) / (n - 2))
    tval = stats.t.ppf(0.975, df=n - 2)
    mean_x = np.mean(x_sorted)
    margin = tval * se * np.sqrt(1/n + (x_sorted - mean_x)**2 / np.sum((x_sorted - mean_x)**2))
    lower = yhat - margin
    upper = yhat + margin

    # Pearson r
    r, p_r = stats.pearsonr(x, y)

    # Plot
    ax.fill_between(x_sorted, lower, upper, color='lightblue', alpha=0.3, label="95% CI")
    ax.plot(x_sorted, yhat, 'r', lw=1.5, label=f"Linear Fit  $R^2$={r2:.2f}")
    ax.scatter(x, y, alpha=0.6)

    for xi, yi, m in zip(x, y, mrns):
        ax.annotate(m, (xi, yi), fontsize=7, alpha=.6)

    ax.set(
        title=title,
        xlabel=f"{stratx_col} (StratX Reference) [%]",
        ylabel=f"{model_col} (Model Prediction) [%]",
        xlim=(0, 100),
        ylim=(0, 100)
    )
    ax.grid(True)
    ax.legend()

    txt = f"$r$ = {r:.2f}  (p = {p_r:.3f})"
    ax.text(0.02, 0.98, txt, transform=ax.transAxes,
            va="top", ha="left", fontsize=9,
            bbox=dict(facecolor="white", alpha=.8, edgecolor="none"))

# === Drop NaNs ===
df = df.dropna(subset=["MRN", "LOF", "LLL", "ROF", "RLL", "RML1", "RML", "RUL1", "RUL"]).copy()
df = df.apply(pd.to_numeric, errors='ignore')

# === Plot comparisons (switched subplot 2 and 3) ===
plot_and_stats(df, "LLL", "LOF", "MRN", "LOF (Model) vs LLL (StratX)", axes[0, 0])
plot_and_stats(df, "RLL", "ROF", "MRN", "ROF (Model) vs RLL (StratX)", axes[0, 1]) 
plot_and_stats(df, "RUL", "RUL1", "MRN", "RUL (Model) vs RUL (StratX)", axes[1, 0])  
plot_and_stats(df, "RML", "RML1", "MRN", "RML (Model) vs RML (StratX)", axes[1, 1])

# === Final layout ===
plt.tight_layout()
plt.show()


## Scatter plotting the completeness scores with Spearman's correlation coefficient

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats

# === Load all columns from a single file ===
df = pd.read_excel(r"------ INSERT PATH HERE ------\fissure_completeness_all_columnwise_S_2.xlsx")

# === Drop NaNs and ensure numeric ===
df = df.dropna(subset=["MRN", "LOF", "LLL", "ROF", "RLL", "RML1", "RML", "RUL1", "RUL"]).copy()
df = df.apply(pd.to_numeric, errors='ignore')

# === Setup 2x2 plot ===
fig, axes = plt.subplots(2, 2, figsize=(14, 12))

# === Helper function using Spearman and LOWESS ===
def plot_and_stats(df, stratx_col, model_col, title, ax):
    x = df[stratx_col]
    y = df[model_col]

    # Spearman correlation
    rho, p_s = stats.spearmanr(x, y)

    # Scatter plot with LOWESS trend
    sns.scatterplot(x=x, y=y, ax=ax, alpha=0.6)
    sns.regplot(x=x, y=y, ax=ax, lowess=True, scatter=False, color='red', label="LOWESS Trend")

    # Axes and titles
    ax.set(
        title=title,
        xlabel=f"{stratx_col} (StratX Reference) [%]",
        ylabel=f"{model_col} (Model Prediction) [%]",
        xlim=(0, 100),
        ylim=(0, 100)
    )
    ax.grid(True)
    ax.legend()

    # Annotate Spearman's rho
    txt = f"$\\rho$ = {rho:.2f}  (p = {p_s:.3f})"
    ax.text(0.02, 0.98, txt, transform=ax.transAxes,
            va="top", ha="left", fontsize=9,
            bbox=dict(facecolor="white", alpha=.8, edgecolor="none"))

# === Generate plots ===
plot_and_stats(df, "LLL", "LOF", "A. Left oblique fissure", axes[0, 0])
plot_and_stats(df, "RLL", "ROF", "B. Right oblique fissure", axes[0, 1]) 
plot_and_stats(df, "RUL", "RUL1", "C. Fissures around Right upper lobe", axes[1, 0])  
plot_and_stats(df, "RML", "RML1", "D. Fissures around Right middle lobe", axes[1, 1])

# === Final layout ===
plt.tight_layout()
plt.savefig(r"------ INSERT PATH HERE ------\Figure3.png", dpi=300, bbox_inches='tight')  # Save at 300 DPI
plt.show()


## Model-derived completeness scores correlation and absolute agreement with StratX scores - Spearman correlation coefficient and ICC

In [None]:
import pandas as pd
import pingouin as pg
from scipy.stats import spearmanr
import numpy as np
from scipy import stats

# === Load Data ===
df = pd.read_excel(r"------ INSERT PATH HERE ------\fissure_completeness_all_columnwise_S_2.xlsx")
df = df.dropna(subset=["MRN", "LOF", "LLL", "ROF", "RLL", "RML1", "RML", "RUL1", "RUL"]).copy()
df = df.apply(pd.to_numeric, errors='ignore')

# === Function to compute statistics ===
def compute_metrics(df, model_col, stratx_col, id_col='MRN'):
    df_pair = df[[id_col, model_col, stratx_col]].dropna()

    # ICC(3,1)
    df_long = df_pair.melt(id_vars=id_col, value_vars=[model_col, stratx_col],
                           var_name='rater', value_name='score')
    df_long['rater'] = df_long['rater'].map({model_col: "Model", stratx_col: "StratX"})
    icc_result = pg.intraclass_corr(data=df_long, targets=id_col, raters='rater', ratings='score')
    icc_row = icc_result[icc_result["Type"] == "ICC3"].iloc[0]

    # Spearman rho with 95% CI (using Fisher z)
    x = df_pair[model_col]
    y = df_pair[stratx_col]
    rho, p = spearmanr(x, y)
    n = len(x)
    if abs(rho) == 1:  # Avoid infinity in arctanh for perfect correlation
        lo, hi = rho, rho
    else:
        stderr = 1.0 / np.sqrt(n - 3)
        delta = 1.96 * stderr
        z = np.arctanh(rho)
        lo = np.tanh(z - delta)
        hi = np.tanh(z + delta)

    return {
        "Comparison": f"{model_col} vs {stratx_col}",
        "Spearman ρ (95% CI)": f"{rho:.2f} ({lo:.2f}–{hi:.2f})",
        "Spearman p": f"{p:.3g}",
        "ICC(3,1) (95% CI)": f"{icc_row['ICC']:.2f} ({icc_row['CI95%'][0]:.2f}–{icc_row['CI95%'][1]:.2f})",
        "ICC p": f"{icc_row['pval']:.3g}"
    }

# === Comparisons in specified order ===
results = []
pairs = [
    ('LOF', 'LLL'),
    ('ROF', 'RLL'),
    ('RUL1', 'RUL'),
    ('RML1', 'RML')
]

for model_col, stratx_col in pairs:
    results.append(compute_metrics(df, model_col, stratx_col))

# === Create and display results table ===
results_df = pd.DataFrame(results)
print(results_df)


## Determining the optimal thresholds to predict the fissure completeness (>=95%) based on StratX values (with 95% CI)

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_curve, auc
from tqdm import tqdm

# === Load and preprocess data ===
df = pd.read_excel(r"------ INSERT PATH HERE ------\fissure_completeness_all_columnwise_S_2.xlsx")
df = df.dropna(subset=["MRN", "LOF", "LLL", "ROF", "RLL", "RML1", "RML", "RUL1", "RUL"]).copy()
df = df.apply(pd.to_numeric, errors='ignore')

# === Fissure model/reference pairs ===
pairs = [
    ('LOF', 'LLL'),
    ('ROF', 'RLL'),
    ('RUL1', 'RUL'),
    ('RML1', 'RML')
]

# === Bootstrap helper ===
def bootstrap_metrics(y_true, y_scores, n_bootstraps=1000, seed=42):
    rng = np.random.RandomState(seed)
    stats_list = []

    for _ in range(n_bootstraps):
        indices = rng.choice(len(y_true), len(y_true), replace=True)
        y_true_b = y_true[indices]
        y_scores_b = y_scores[indices]

        fpr, tpr, thresholds = roc_curve(y_true_b, y_scores_b)
        youden = tpr - fpr
        max_idx = np.argmax(youden)

        best_thresh = thresholds[max_idx]
        sens = tpr[max_idx]
        spec = 1 - fpr[max_idx]
        youden_val = youden[max_idx]
        auc_val = auc(fpr, tpr)

        stats_list.append([best_thresh, sens, spec, youden_val, auc_val])

    stats_arr = np.array(stats_list)
    means = np.mean(stats_arr, axis=0)
    lower = np.percentile(stats_arr, 2.5, axis=0)
    upper = np.percentile(stats_arr, 97.5, axis=0)

    return {
        "Best Threshold (95% CI)": f"{means[0]:.2f} ({lower[0]:.2f}–{upper[0]:.2f})",
        "Sensitivity (95% CI)": f"{means[1]:.3f} ({lower[1]:.3f}–{upper[1]:.3f})",
        "Specificity (95% CI)": f"{means[2]:.3f} ({lower[2]:.3f}–{upper[2]:.3f})",
        "Youden Index (95% CI)": f"{means[3]:.3f} ({lower[3]:.3f}–{upper[3]:.3f})",
        "AUC (95% CI)": f"{means[4]:.3f} ({lower[4]:.3f}–{upper[4]:.3f})"
    }

# === Run for individual fissures ===
results = []
for model_col, stratx_col in tqdm(pairs, desc="Bootstrapping Fissures"):
    df_pair = df[[model_col, stratx_col]].dropna()
    y_true = (df_pair[stratx_col] >= 95).astype(int).values
    y_scores = df_pair[model_col].values
    metrics = bootstrap_metrics(y_true, y_scores)
    metrics["Fissure"] = f"{model_col} vs {stratx_col}"
    results.append(metrics)

# === Combined analysis ===
combined = pd.DataFrame()
for model_col, stratx_col in pairs:
    df_pair = df[[model_col, stratx_col]].dropna()
    df_temp = pd.DataFrame({
        "model_score": df_pair[model_col],
        "stratx_binary": (df_pair[stratx_col] >= 95).astype(int)
    })
    combined = pd.concat([combined, df_temp], ignore_index=True)

metrics_combined = bootstrap_metrics(
    combined["stratx_binary"].values, combined["model_score"].values
)
metrics_combined["Fissure"] = "Combined"
results.append(metrics_combined)

# === Final results table ===
results_df = pd.DataFrame(results)
cols_order = ["Fissure", "Best Threshold (95% CI)", "Sensitivity (95% CI)",
              "Specificity (95% CI)", "Youden Index (95% CI)", "AUC (95% CI)"]
results_df = results_df[cols_order]

# === Display in notebook ===
print(results_df.to_string(index=False))


## ROC for fissure completeness - StratX 95% threshold based prediction of Complete Fissure

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from sklearn.utils import resample

# === Load unified file ===
df = pd.read_excel(r"------ INSERT PATH HERE ------\fissure_completeness_all_columnwise_S_2.xlsx")

# === Drop missing values for required columns ===
df = df.dropna(subset=["MRN", "LOF", "LLL", "ROF", "RLL", "RML1", "RML", "RUL1", "RUL"]).copy()
df = df.apply(pd.to_numeric, errors='ignore')

# === Setup 2x2 plot ===
fig, axes = plt.subplots(2, 2, figsize=(14, 12))

# === Helper function for ROC with shaded CI ===
def plot_roc_with_ci(df, stratx_col, model_col, title, ax, n_bootstraps=1000, seed=42):
    rng = np.random.RandomState(seed)
    y_true = (df[stratx_col] >= 95).astype(int).values
    y_score = df[model_col].values

    # ROC curve and AUC
    fpr, tpr, _ = roc_curve(y_true, y_score)
    roc_auc = auc(fpr, tpr)

    # Bootstrap for CI
    mean_fpr = np.linspace(0, 1, 100)
    tprs = []
    aucs = []

    for _ in range(n_bootstraps):
        indices = rng.randint(0, len(y_score), len(y_score))
        if len(np.unique(y_true[indices])) < 2:
            continue
        fpr_b, tpr_b, _ = roc_curve(y_true[indices], y_score[indices])
        aucs.append(auc(fpr_b, tpr_b))

        interp_tpr = np.interp(mean_fpr, fpr_b, tpr_b)
        interp_tpr[0] = 0.0
        tprs.append(interp_tpr)

    tprs = np.array(tprs)
    mean_tpr = np.mean(tprs, axis=0)
    std_tpr = np.std(tprs, axis=0)

    lower_tpr = np.maximum(mean_tpr - 1.96 * std_tpr, 0)
    upper_tpr = np.minimum(mean_tpr + 1.96 * std_tpr, 1)

    # AUC 95% CI
    lower_auc = np.percentile(aucs, 2.5)
    upper_auc = np.percentile(aucs, 97.5)

    # Plot ROC + CI band
    ax.plot(fpr, tpr, color='steelblue', lw=2, label=f"AUC = {roc_auc:.2f} (95% CI: {lower_auc:.2f}–{upper_auc:.2f})")
    ax.fill_between(mean_fpr, lower_tpr, upper_tpr, color='steelblue', alpha=0.2, label="95% CI")
    ax.plot([0, 1], [0, 1], "k--", lw=1)

    ax.set(title=title,
           xlabel="False Positive Rate",
           ylabel="True Positive Rate",
           xlim=(0, 1),
           ylim=(0, 1))
    ax.grid(True)
    ax.legend(loc="lower right")

# === Plot (order: LOF, ROF, RUL, RML) ===
plot_roc_with_ci(df, "LLL",  "LOF",  "A. Left oblique fissure", axes[0, 0])
plot_roc_with_ci(df, "RLL",  "ROF",  "B. Right oblique fissure ", axes[0, 1])
plot_roc_with_ci(df, "RUL",  "RUL1", "C. Fissures around right upper lobe", axes[1, 0])
plot_roc_with_ci(df, "RML",  "RML1", "D. Fissures around right middle lobe", axes[1, 1])

# === Final layout ===
plt.tight_layout()
plt.savefig(r"------ INSERT PATH HERE ------\Figure4.png", dpi=300, bbox_inches='tight')  # Save at 300 DPI
plt.show()
