# THIRU Manuscript — Analysis and Figure Generation
## Benchmarking Deep Learning Methods for Mitochondria and Membrane Segmentation in Retinal EM
Generates 4 publication-quality figures for IOVS submission.


In [1]:
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import tifffile
from pathlib import Path
from PIL import Image

# Paths
DATA_DIR = Path("data")
FIG_DIR = Path("figures")
FIG_DIR.mkdir(exist_ok=True)

# Publication settings
plt.rcParams.update({
    "font.family": "serif",
    "font.serif": ["Times New Roman", "DejaVu Serif"],
    "font.size": 10,
    "axes.labelsize": 11,
    "axes.titlesize": 12,
    "xtick.labelsize": 9,
    "ytick.labelsize": 9,
    "legend.fontsize": 8,
    "figure.dpi": 300,
    "savefig.dpi": 300,
    "savefig.bbox": "tight",
    "savefig.pad_inches": 0.05,
})

# Colors (consistent with webapp)
MITO_COLOR = (0/255, 200/255, 0/255)     # green
MEMBRANE_COLOR = (0/255, 100/255, 255/255)  # blue

# Load data
all_runs = pd.read_csv(DATA_DIR / "all_runs_comparison.csv")
final_best = pd.read_csv(DATA_DIR / "final_best_results.csv")
combo_results = pd.read_csv(DATA_DIR / "combination_optimization" / "all_combination_results.csv")
method_comparison = pd.read_csv(DATA_DIR / "combination_optimization" / "method_comparison.csv")

print("Data loaded successfully")
print(f"All runs: {len(all_runs)} rows")
print(f"Combination results: {len(combo_results)} rows")
print(f"Final best results:")
print(final_best[["structure","best_dice","source_run","threshold_used","tta_applied","ensemble_applied"]].to_string(index=False))


Data loaded successfully
All runs: 20 rows
Combination results: 324 rows
Final best results:
   structure  best_dice       source_run  threshold_used  tta_applied  ensemble_applied
    vesicles   0.152006 finetune_retrain            0.50        False             False
mitochondria   0.869300     ensemble_max            0.35         True              True
    membrane   0.892483         finetune            0.40        False             False


In [2]:
# Figure 1: Qualitative comparison — EM image with GT vs predicted overlays
fig, axes = plt.subplots(2, 3, figsize=(7.5, 5.2))

structures = [
    ("mito", "Mitochondria", MITO_COLOR),
    ("membrane", "Membrane", MEMBRANE_COLOR),
]

for row_idx, (struct_key, struct_name, color) in enumerate(structures):
    # Load raw image and GT label
    raw = tifffile.imread(DATA_DIR / "images" / f"{struct_key}_image_section_002.tif")
    gt = tifffile.imread(DATA_DIR / "images" / f"{struct_key}_label_section_002.tif")
    
    # Normalize raw to 0-1 for display
    raw_norm = raw.astype(np.float32)
    if raw_norm.max() > 0:
        raw_norm = (raw_norm - raw_norm.min()) / (raw_norm.max() - raw_norm.min())
    
    # Create GT binary mask
    gt_mask = (gt > 0).astype(np.float32)
    
    # Panel A: Raw EM image
    axes[row_idx, 0].imshow(raw_norm, cmap="gray", vmin=0, vmax=1)
    axes[row_idx, 0].set_title("EM Image" if row_idx == 0 else "")
    axes[row_idx, 0].set_ylabel(struct_name, fontsize=11, fontweight="bold")
    axes[row_idx, 0].set_xticks([])
    axes[row_idx, 0].set_yticks([])
    
    # Panel B: GT overlay
    rgb_gt = np.stack([raw_norm]*3, axis=-1)
    for c in range(3):
        rgb_gt[:,:,c] = np.where(gt_mask > 0,
            rgb_gt[:,:,c] * 0.5 + color[c] * 0.5,
            rgb_gt[:,:,c])
    axes[row_idx, 1].imshow(rgb_gt)
    axes[row_idx, 1].set_title("Ground Truth" if row_idx == 0 else "")
    axes[row_idx, 1].set_xticks([])
    axes[row_idx, 1].set_yticks([])
    
    # Panel C: Best prediction overlay (load eval overlay)
    eval_img = np.array(Image.open(DATA_DIR / "images" / f"{struct_key}_eval_overlay.png"))
    axes[row_idx, 2].imshow(eval_img)
    axes[row_idx, 2].set_title("Prediction" if row_idx == 0 else "")
    axes[row_idx, 2].set_xticks([])
    axes[row_idx, 2].set_yticks([])

plt.tight_layout(w_pad=0.5, h_pad=0.8)

# Save
fig.savefig(FIG_DIR / "fig1_qualitative.png", dpi=300)
fig.savefig(FIG_DIR / "fig1_qualitative.tiff", dpi=300)
plt.close(fig)
print("Figure 1 saved: qualitative comparison")


Figure 1 saved: qualitative comparison


In [3]:
# Figure 2: Method comparison bar chart — 6 methods x 2 structures
# Extract best Dice per method per structure (mito + membrane only)
# Methods: unadapted, domain_adapted, fine-tuned (dice), fine-tuned (focal), DA+FT, micro-SAM zero-shot

method_labels = [
    "Unadapted",
    "Domain\nAdapted",
    "Fine-tuned\n(Dice loss)",
    "Fine-tuned\n(Focal loss)",
    "DA + FT",
    "micro-SAM\nZero-shot",
]

# From all_runs_comparison.csv — extract Dice for each method/structure
# Mito scores
mito_scores = [
    0.000,   # unadapted (run 9)
    0.000,   # domain_adapted (run 19)
    0.155,   # fine-tuned dice (run 6)
    0.836,   # fine-tuned focal (run 8)
    0.844,   # DA+FT (run 21)
    0.250,   # micro-SAM zero-shot (run 13)
]

# Membrane scores
membrane_scores = [
    0.000,   # unadapted
    0.000,   # domain_adapted
    0.361,   # fine-tuned dice
    0.870,   # fine-tuned focal
    0.858,   # DA+FT
    0.293,   # micro-SAM zero-shot
]

x = np.arange(len(method_labels))
width = 0.35

fig, ax = plt.subplots(figsize=(7, 4))

bars1 = ax.bar(x - width/2, mito_scores, width, label="Mitochondria",
               color=MITO_COLOR, edgecolor="black", linewidth=0.5, alpha=0.85)
bars2 = ax.bar(x + width/2, membrane_scores, width, label="Membrane",
               color=MEMBRANE_COLOR, edgecolor="black", linewidth=0.5, alpha=0.85)

# Add value labels on bars
for bar in bars1:
    height = bar.get_height()
    if height > 0.02:
        ax.text(bar.get_x() + bar.get_width()/2., height + 0.015,
                f"{height:.3f}", ha="center", va="bottom", fontsize=7)

for bar in bars2:
    height = bar.get_height()
    if height > 0.02:
        ax.text(bar.get_x() + bar.get_width()/2., height + 0.015,
                f"{height:.3f}", ha="center", va="bottom", fontsize=7)

ax.set_ylabel("Dice Coefficient")
ax.set_xticks(x)
ax.set_xticklabels(method_labels, fontsize=8)
ax.set_ylim(0, 1.05)
ax.legend(loc="upper left", framealpha=0.9)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.axhline(y=0, color="black", linewidth=0.5)

plt.tight_layout()
fig.savefig(FIG_DIR / "fig2_method_comparison.png", dpi=300)
fig.savefig(FIG_DIR / "fig2_method_comparison.tiff", dpi=300)
plt.close(fig)
print("Figure 2 saved: method comparison bar chart")

Figure 2 saved: method comparison bar chart


In [4]:
# Figure 3: Threshold sweep curves — Dice vs threshold for best methods
fig, axes = plt.subplots(1, 2, figsize=(7, 3.5))

structures_cfg = [
    ("mitochondria", "Mitochondria", axes[0]),
    ("membrane", "Membrane", axes[1]),
]

method_styles = {
    "finetune": {"label": "Fine-tuned (focal)", "color": "#2196F3", "ls": "-", "marker": "o"},
    "da_then_finetune": {"label": "DA + FT", "color": "#FF9800", "ls": "--", "marker": "s"},
    "domain_adapted": {"label": "Domain adapted", "color": "#9E9E9E", "ls": ":", "marker": "^"},
}

# Filter phase A (threshold sweep, no TTA)
thresh_data = combo_results[combo_results["phase"] == "A_threshold"]

for struct, struct_label, ax in structures_cfg:
    struct_data = thresh_data[thresh_data["structure"] == struct]
    
    for method_key, style in method_styles.items():
        mdata = struct_data[struct_data["method"] == method_key].sort_values("threshold")
        if len(mdata) > 0 and mdata["dice"].max() > 0:
            ax.plot(mdata["threshold"], mdata["dice"],
                    label=style["label"], color=style["color"],
                    linestyle=style["ls"], marker=style["marker"],
                    markersize=4, linewidth=1.5)
    
    ax.set_xlabel("Threshold")
    ax.set_ylabel("Dice Coefficient")
    ax.set_title(struct_label, fontweight="bold")
    ax.set_xlim(0.05, 0.75)
    ax.set_ylim(0, 1.0)
    ax.legend(fontsize=7, loc="lower left")
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.grid(True, alpha=0.3)

plt.tight_layout()
fig.savefig(FIG_DIR / "fig3_threshold_sweep.png", dpi=300)
fig.savefig(FIG_DIR / "fig3_threshold_sweep.tiff", dpi=300)
plt.close(fig)
print("Figure 3 saved: threshold sweep curves")


Figure 3 saved: threshold sweep curves


In [5]:
# Figure 4: TTA and ensemble optimization — improvement progression per structure
fig, axes = plt.subplots(1, 2, figsize=(7, 3.5))

structures_cfg = [
    ("mitochondria", "Mitochondria", axes[0]),
    ("membrane", "Membrane", axes[1]),
]

# For each structure, show best Dice at each optimization phase
for struct, struct_label, ax in structures_cfg:
    struct_data = combo_results[combo_results["structure"] == struct]
    
    # Phase A: best threshold (no TTA, no ensemble)
    phase_a = struct_data[(struct_data["phase"] == "A_threshold") & (struct_data["method"] != "domain_adapted")]
    best_a = phase_a.loc[phase_a["dice"].idxmax()] if len(phase_a) > 0 else None
    
    # Phase B: best TTA
    phase_b = struct_data[(struct_data["phase"] == "B_tta") & (struct_data["method"] != "domain_adapted")]
    best_b = phase_b.loc[phase_b["dice"].idxmax()] if len(phase_b) > 0 else None
    
    # Phase C: best ensemble
    phase_c = struct_data[struct_data["phase"].str.startswith("C_ensemble")]
    best_c = phase_c.loc[phase_c["dice"].idxmax()] if len(phase_c) > 0 else None
    
    phases = ["Threshold\nOptimization", "+ TTA", "+ Ensemble"]
    dices = [best_a["dice"] if best_a is not None else 0,
             best_b["dice"] if best_b is not None else 0,
             best_c["dice"] if best_c is not None else 0]
    
    color = MITO_COLOR if struct == "mitochondria" else MEMBRANE_COLOR
    bars = ax.bar(phases, dices, color=color, edgecolor="black", linewidth=0.5, alpha=0.85, width=0.6)
    
    # Annotate bars
    for bar, dice in zip(bars, dices):
        if dice > 0:
            ax.text(bar.get_x() + bar.get_width()/2., dice + 0.01,
                    f"{dice:.3f}", ha="center", va="bottom", fontsize=8, fontweight="bold")
    
    ax.set_ylabel("Dice Coefficient")
    ax.set_title(struct_label, fontweight="bold")
    if struct == "mitochondria":
        ax.set_ylim(0.8, 0.92)
    else:
        ax.set_ylim(0.8, 0.95)
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)

plt.tight_layout()
fig.savefig(FIG_DIR / "fig4_optimization_progression.png", dpi=300)
fig.savefig(FIG_DIR / "fig4_optimization_progression.tiff", dpi=300)
plt.close(fig)
print("Figure 4 saved: TTA/ensemble optimization progression")

Figure 4 saved: TTA/ensemble optimization progression


In [6]:
# Verify all figures exist
import os
for f in ["fig1_qualitative", "fig2_method_comparison", "fig3_threshold_sweep", "fig4_optimization_progression"]:
    png = FIG_DIR / f"{f}.png"
    tiff = FIG_DIR / f"{f}.tiff"
    print(f"{f}: PNG={os.path.getsize(png)/1024:.0f}KB, TIFF={os.path.getsize(tiff)/1024:.0f}KB")
print("\nAll figures generated successfully!")

fig1_qualitative: PNG=2716KB, TIFF=12510KB
fig2_method_comparison: PNG=86KB, TIFF=9085KB
fig3_threshold_sweep: PNG=95KB, TIFF=7889KB
fig4_optimization_progression: PNG=97KB, TIFF=7885KB

All figures generated successfully!
