In [None]:
import openslide
from pathlib import Path

# Use the first file found
wsi_dir = Path("dataset/training_dataset/training_image_data")
svs_path = next(wsi_dir.glob("*.svs"))

print(f"Inspecting: {svs_path}")
try:
    slide = openslide.OpenSlide(str(svs_path))
    print(f"Levels: {slide.level_count}")
    print(f"Dimensions: {slide.dimensions}")
    print(f"Level dimensions: {slide.level_dimensions}")
    print(f"Level downsamples: {slide.level_downsamples}")
    print(f"Objective power: {slide.properties.get('openslide.objective-power')}")
    print(f"Magnification: {slide.properties.get('aperio.AppMag')}")
except Exception as e:
    print(f"Error: {e}")

# Visualization of Stage 1 Results

This notebook visualizes the results of the Stage 1 preprocessing pipeline. It compares the original WSI thumbnails with the normalized images, blue ratio images, and generated masks.

In [None]:
from typing import Optional

import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import openslide
import numpy as np
from pathlib import Path

# Set plot style
plt.style.use('ggplot')

In [None]:
# Load the report
report_path = Path("output/stage_one/reports/stage_one_summary.csv")
if not report_path.exists():
    print(f"Report not found at {report_path}. Please run stage 1 pipeline first.")
else:
    df = pd.read_csv(report_path)
    print(f"Loaded report with {len(df)} records.")
    display(df.head())

In [None]:
def load_image(path: str | Path, is_svs_thumbnail: bool = False, thumb_size: tuple[int, int] = (1024, 1024)) -> Optional[np.ndarray]:
    """Load an image, handling SVS files by extracting a thumbnail."""
    path = Path(path)
    if not path.exists():
        print(f"Warning: File not found: {path}")
        return None
        
    if path.suffix.lower() == '.svs':
        try:
            with openslide.OpenSlide(str(path)) as slide:
                img = slide.get_thumbnail(thumb_size)
                return np.array(img)
        except Exception as e:
            print(f"Error loading SVS {path}: {e}")
            return None
    else:
        try:
            return np.array(Image.open(path))
        except Exception as e:
            print(f"Error loading image {path}: {e}")
            return None

def visualize_row(row):
    """Visualize a single row of the report."""
    orig_path = row['image_path']
    norm_path = row['normalized_path']
    blue_path = row['blue_ratio_path']
    mask_path = row['mask_path']
    
    orig = load_image(orig_path, is_svs_thumbnail=True)
    norm = load_image(norm_path)
    blue = load_image(blue_path)
    mask = load_image(mask_path)
    
    fig, axes = plt.subplots(1, 4, figsize=(24, 6))
    
    # Original
    if orig is not None:
        axes[0].imshow(orig)
        axes[0].set_title(f"Original (Thumbnail)\n{Path(orig_path).name}")
    else:
        axes[0].text(0.5, 0.5, "Image Not Found", ha='center')
    axes[0].axis('off')
    
    # Normalized
    if norm is not None:
        axes[1].imshow(norm)
        axes[1].set_title("Macenko Normalized")
    else:
        axes[1].text(0.5, 0.5, "Image Not Found", ha='center')
    axes[1].axis('off')
    
    # Blue Ratio
    if blue is not None:
        axes[2].imshow(blue, cmap='jet')
        axes[2].set_title("Blue Ratio")
    else:
        axes[2].text(0.5, 0.5, "Image Not Found", ha='center')
    axes[2].axis('off')
    
    # Mask
    if mask is not None:
        axes[3].imshow(mask, cmap='gray')
        axes[3].set_title(f"Stroma Mask\nBlobs: {row['blob_count']}, Avg Area: {row['average_blob_area']:.1f}")
    else:
        axes[3].text(0.5, 0.5, "Image Not Found", ha='center')
    axes[3].axis('off')
    
    plt.tight_layout()
    plt.show()


In [None]:
# Visualize the first 5 processed images
if report_path.exists():
    for i, row in df.head(5).iterrows():
        print(f"Visualizing Index {i}")
        visualize_row(row)