<a href="https://colab.research.google.com/github/Kenny625819/Applied-Data-Science/blob/main/MRI_arial_fig2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# =============================================================================
# Install & Activate Arial Font in Google Colab (Recommended Stable Version)
# =============================================================================
from google.colab import files
print("Please upload Arial.ttf (Regular).")
uploaded = files.upload()

import shutil
import matplotlib.font_manager as fm
import os
import matplotlib as mpl
import matplotlib.pyplot as plt

# 1. フォント保存先
font_dir = "/usr/local/share/fonts/truetype"
os.makedirs(font_dir, exist_ok=True)

# 2. アップロードファイル名を取得して移動
uploaded_name = list(uploaded.keys())[0]
font_path = os.path.join(font_dir, "Arial.ttf")
shutil.move(uploaded_name, font_path)

# 3. Matplotlib にフォントを登録
fm.fontManager.addfont(font_path)

# 4. 全図を Arial で描画するように設定
mpl.rcParams["font.family"] = "Arial"
plt.rcParams["font.family"] = "Arial"
plt.rcParams["axes.unicode_minus"] = False

print("✓ Arial installed and activated successfully!")


Please upload Arial.ttf (Regular).


Saving arial.ttf to arial.ttf
✓ Arial installed and activated successfully!


In [2]:
plt.rcParams['font.size'] = 20
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['axes.labelsize'] = 20
plt.rcParams['xtick.labelsize'] = 20
plt.rcParams['ytick.labelsize'] = 20
plt.rcParams['legend.fontsize'] = 20
plt.rcParams['figure.titlesize'] = 20


In [4]:
###STEP3
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Figure 2 Generation: ESCC CNN Performance Evaluation
----------------------------------------------------

This script computes confusion matrices and ROC curves for out-of-fold (OOF)
ESCC predictions and generates Figure 2 of the manuscript. Specifically:

1. Loads CNN out-of-fold predictions:
       - predicted ESCC labels
       - predicted probabilities per class
       - high-grade probability ("prob_2" + "prob_3")

2. Merges predictions with the consensus ESCC labels from expert raters.

3. Computes:
       • 4-class confusion matrix
       • Binary ROC curve (high-grade ESCC = Bilsky 2–3)
       • Binary AUC with bootstrap 95% CI
       • Macro-AUC (4-class probability-based) with bootstrap 95% CI

4. Generates publication-ready Figure 2:
       A. Confusion Matrix (blue colormap)
       B. ROC curve (without CIs in the legend, as required by the manuscript)

Outputs:
    - Console: AUC values with 95% CIs
    - PNG: "Figure2_ESCC_with_macroCI_blueUnified.png"
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, roc_curve, auc, roc_auc_score
from sklearn.utils import resample
from matplotlib.colors import LinearSegmentedColormap
from sklearn.preprocessing import label_binarize


# ============================================================
# Load prediction and ground-truth data
# ============================================================
PRED_CSV = "escc_oof_predictions.csv"
GT_EXCEL = "ESCC_3_with_consensus.xlsx"

print(f"Loading OOF predictions: {PRED_CSV}")
df_pred = pd.read_csv(PRED_CSV)

print(f"Loading ground-truth consensus labels: {GT_EXCEL}")
df_gt = pd.read_excel(GT_EXCEL)

# Normalize filename (e.g., "1.png" → "1")
df_pred["filename"] = (
    df_pred["filename"]
    .astype(str)
    .str.replace(r"\.[A-Za-z0-9]+$", "", regex=True)
)
df_gt["filename"] = df_gt["filename"].astype(str)

# Merge predictions with ground truth
df = df_pred.merge(
    df_gt[["filename", "ESCC_consensus"]],
    on="filename",
    how="inner"
)

# Valid ESCC classes used for CNN classification
valid_labels = ["1b", "1c", "2", "3"]

df = df[df["pred_label"].isin(valid_labels)]
df = df[df["ESCC_consensus"].isin(valid_labels)]

df["true_label"] = df["ESCC_consensus"]
df["pred_label"] = df["pred_label"]


# ============================================================
# 1. Confusion Matrix (4-class)
# ============================================================
cm = confusion_matrix(df["true_label"], df["pred_label"], labels=valid_labels)


# ============================================================
# 2. Binary ROC Curve (High-grade = Bilsky 2 or 3)
# ============================================================
df["binary_true"] = df["true_label"].isin(["2", "3"]).astype(int)
df["binary_prob"] = df["high_prob"].astype(float)

fpr, tpr, _ = roc_curve(df["binary_true"], df["binary_prob"])
auc_point = auc(fpr, tpr)

# Bootstrap CI for binary AUC
boot = []
for i in range(1000):
    sample = resample(df)
    if sample["binary_true"].nunique() < 2:
        continue
    fpr_b, tpr_b, _ = roc_curve(sample["binary_true"], sample["binary_prob"])
    boot.append(auc(fpr_b, tpr_b))

bin_ci_l = np.percentile(boot, 2.5)
bin_ci_u = np.percentile(boot, 97.5)


# ============================================================
# 3. Macro-AUC (4-class) + CI
# ============================================================
y_true_bin = label_binarize(df["true_label"], classes=valid_labels)
y_score_bin = df[["prob_1b", "prob_1c", "prob_2", "prob_3"]].values

macro_auc_point = roc_auc_score(y_true_bin, y_score_bin, average="macro")

macro_boot = []
for i in range(1000):
    sample = resample(df)
    ys = label_binarize(sample["true_label"], classes=valid_labels)
    ps = sample[["prob_1b", "prob_1c", "prob_2", "prob_3"]].values
    try:
        macro_boot.append(roc_auc_score(ys, ps, average="macro"))
    except Exception:
        continue

macro_ci_l = np.percentile(macro_boot, 2.5)
macro_ci_u = np.percentile(macro_boot, 97.5)


# Print for inclusion in manuscript
print("\n===== AUCs for Manuscript Text =====")
print(f"Binary AUC (2–3 vs 1b/1c): {auc_point:.3f} "
      f"(95% CI {bin_ci_l:.3f}–{bin_ci_u:.3f})")
print(f"Macro AUC (4-class): {macro_auc_point:.3f} "
      f"(95% CI {macro_ci_l:.3f}–{macro_ci_u:.3f})")


# ============================================================
# Custom blue colormap (for visual consistency across manuscript)
# ============================================================
roc_blue_cmap = LinearSegmentedColormap.from_list(
    "roc_blue",
    ["#e6f2ff", "#1f77b4"]
)


# ============================================================
# 4. Figure 2 Plotting
# ============================================================
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# -----------------------------
# A. Confusion Matrix
# -----------------------------
ax = axes[0]
im = ax.imshow(cm, cmap=roc_blue_cmap)

ax.set_title("Confusion Matrix", fontsize=20)
ax.set_xlabel("Predicted label", fontsize=20)
ax.set_ylabel("True label", fontsize=20)

ax.set_xticks(np.arange(len(valid_labels)))
ax.set_xticklabels(valid_labels, fontsize=20)
ax.set_yticks(np.arange(len(valid_labels)))
ax.set_yticklabels(valid_labels, fontsize=20)

for i in range(len(valid_labels)):
    for j in range(len(valid_labels)):
        value = cm[i, j]
        color = "white" if value > cm.max() * 0.5 else "black"
        ax.text(
            j, i, value,
            ha="center", va="center",
            fontsize=20, color=color
        )

plt.colorbar(im, ax=ax)

# -----------------------------
# B. Binary ROC Curve (No CI in legend)
# -----------------------------
ax2 = axes[1]
ax2.plot(
    fpr, tpr,
    color="#1f77b4",
    linewidth=2,
    label=f"AUC = {auc_point:.3f}"
)

ax2.plot([0, 1], [0, 1], color="gray", linestyle="--", linewidth=1)

ax2.set_title("High-grade ESCC Detection", fontsize=20)
ax2.set_xlabel("1 - Specificity", fontsize=20)
ax2.set_ylabel("Sensitivity", fontsize=20)
ax2.tick_params(labelsize=20)
ax2.legend(fontsize=15, loc="lower right")

plt.tight_layout()
output_path = "Figure2_ESCC_with_macroCI_blueUnified.png"
plt.savefig(output_path, dpi=600)
plt.close()

print(f"\n✓ Saved Figure 2 to: {output_path}")


Loading OOF predictions: escc_oof_predictions.csv
Loading ground-truth consensus labels: ESCC_3_with_consensus.xlsx

===== AUCs for Manuscript Text =====
Binary AUC (2–3 vs 1b/1c): 0.744 (95% CI 0.640–0.838)
Macro AUC (4-class): 0.606 (95% CI 0.518–0.682)

✓ Saved Figure 2 to: Figure2_ESCC_with_macroCI_blueUnified.png
