# Quality control notebook for Pasteur 2025 course


---

In [None]:
#@markdown ##Play the cell to connect your Google Drive to Colab

#@markdown * Click on the URL.

#@markdown * Sign in your Google Account.

#@markdown * Copy the authorization code.

#@markdown * Enter the authorization code.

#@markdown * Click on "Files" site on the right. Refresh the site. Your Google Drive folder should now be available here as "drive".

# mount user's Google Drive to Google Colab.
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
#@markdown ##Install dependencies

!pip install -q numpy==1.24.3 numba --no-cache-dir --force-reinstall
!pip install -q stardist


print('Stopping runtime...')
exit()
print('You can run the next cell now.')

In [None]:
#@markdown ##Load dependencies

import os
import numpy as np
import matplotlib.pyplot as plt
from skimage.io import imread
from tqdm import tqdm
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd
from tabulate import tabulate
import ipywidgets as widgets
from IPython.display import display, clear_output
from stardist.matching import matching



In [None]:
#@markdown ##Conduct QC on test data

# Define paths
gt_labels = '/content/gdrive/MyDrive/Pasteur_Image_analysis_course_2025_BiaPy/2_test/2-label'  #@param {type:"string"}
predicted_labels = '/content/gdrive/MyDrive/Pasteur_Image_analysis_course_2025_BiaPy/3_model/Pasteur_100/results/Pasteur_100_1/per_image_instances'  #@param {type:"string"}
results_folder = '/content/gdrive/MyDrive/Pasteur_Image_analysis_course_2025_BiaPy/3_model/Pasteur_100/results/Pasteur_100_1' #@param {type:"string"}

# Create the QC folder inside the results folder
QC_folder = os.path.join(results_folder, "QC")
os.makedirs(QC_folder, exist_ok=True) # Create the folder if it doesn't exist

def load_label_images_with_names(folder_path):
    file_names = sorted(f for f in os.listdir(folder_path) if f.lower().endswith(('.tif', '.tiff', '.png', '.jpg', '.jpeg')))
    return [(fn, imread(os.path.join(folder_path, fn))) for fn in file_names]

# Load data
gt_data = load_label_images_with_names(gt_labels)
pred_data = load_label_images_with_names(predicted_labels)

assert len(gt_data) == len(pred_data), "Mismatch in number of GT and predicted images."

# ---- Initialize PDF for saving visualizations ---- #
pdf_path = os.path.join(QC_folder, "QC_visualizations.pdf")
pdf = PdfPages(pdf_path)

# ---- Evaluate and visualize ---- #
records = []
print("\nEvaluating and saving visualizations...\n")

for (gt_name, gt_img), (pred_name, pred_img) in tqdm(zip(gt_data, pred_data), total=len(gt_data), desc="Processing"):
    assert gt_name == pred_name, f"Mismatched files: {gt_name} vs {pred_name}"
    res = matching(gt_img, pred_img, thresh=0.5, criterion='iou')._asdict()
    res["filename"] = gt_name

    # Calculate measured IoU for binary masks
    gt_mask = (gt_img > 0).astype(np.uint8) * 255
    pred_mask = (pred_img > 0).astype(np.uint8) * 255
    intersection = np.logical_and(gt_mask, pred_mask)
    union = np.logical_or(gt_mask, pred_mask)
    iou_score = np.sum(intersection) / np.sum(union) if np.sum(union) > 0 else 0

    res["measured_IoU"] = iou_score  # Add measured IoU to the record
    records.append(res)

    # Visualization
    plt.figure(figsize=(15, 4))
    plt.suptitle(f'{gt_name} — IoU: {iou_score:.3f}', fontsize=14)

    plt.subplot(1, 3, 1)
    plt.imshow(gt_mask, cmap='Greens')
    plt.axis('off')
    plt.title('Ground Truth')

    plt.subplot(1, 3, 2)
    plt.imshow(pred_mask, cmap='Purples')
    plt.axis('off')
    plt.title('Prediction')

    plt.subplot(1, 3, 3)
    plt.imshow(gt_mask, cmap='Greens')
    plt.imshow(pred_mask, cmap='Purples', alpha=0.5)
    plt.axis('off')
    plt.title('Overlay')

    plt.tight_layout()

    # Save figure to PDF
    pdf.savefig()
    plt.close()

# Save PDF
pdf.close()

# ---- Save results as CSV ---- #
df = pd.DataFrame(records)
cols = ['filename', 'measured_IoU'] + [col for col in df.columns if col not in ['filename', 'measured_IoU']]
df = df[cols]  # Reorder columns
csv_path = os.path.join(QC_folder, "QC_metrics.csv")
df.to_csv(csv_path, index=False)

# ---- Print final message ---- #
print("\n✅ Visualizations saved to:", pdf_path)
print("✅ Metrics CSV saved to:", csv_path)
print("\nFinal Quality Control Summary:\n")
print(tabulate(df, headers='keys', tablefmt='psql'))

# ---- ADD PULLDOWN MENU FOR IMAGE PREVIEW ---- #

def show_image_preview(idx):
    gt_name, gt_img = gt_data[idx]
    _, pred_img = pred_data[idx]

    gt_mask = (gt_img > 0).astype(np.uint8) * 255
    pred_mask = (pred_img > 0).astype(np.uint8) * 255

    plt.figure(figsize=(15, 4))
    plt.suptitle(f'{gt_name}', fontsize=14)

    plt.subplot(1, 3, 1)
    plt.imshow(gt_mask, cmap='Greens')
    plt.axis('off')
    plt.title('Ground Truth')

    plt.subplot(1, 3, 2)
    plt.imshow(pred_mask, cmap='Purples')
    plt.axis('off')
    plt.title('Prediction')

    plt.subplot(1, 3, 3)
    plt.imshow(gt_mask, cmap='Greens')
    plt.imshow(pred_mask, cmap='Purples', alpha=0.5)
    plt.axis('off')
    plt.title('Overlay')

    plt.tight_layout()
    plt.show()

# Create dropdown widget
image_names = [name for name, _ in gt_data]
dropdown = widgets.Dropdown(
    options=[(name, idx) for idx, name in enumerate(image_names)],
    value=0,
    description='Select Image:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='50%')
)

output = widgets.Output()

def on_dropdown_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        with output:
            clear_output(wait=True)
            show_image_preview(change['new'])

dropdown.observe(on_dropdown_change)

# Display dropdown and initial image
display(dropdown)
display(output)
with output:
    show_image_preview(0)