In [1]:
# Cell 1: Import libraries
import numpy as np
from cellpose import models
import skimage
import nd2
import xarray

# figures
import matplotlib.pyplot as plt
from matplotlib_scalebar.scalebar import ScaleBar

# plotting and processing
from tqdm.notebook import tqdm
import pandas as pd
import seaborn as sns
import os
from pathlib import Path
import warnings
import openpyxl



Welcome to CellposeSAM, cellpose v
cellpose version: 	4.0.5 
platform:       	win32 
python version: 	3.11.13 
torch version:  	2.7.1+cu128! The neural network component of
CPSAM is much larger than in previous versions and CPU excution is slow. 
We encourage users to use GPU/MPS if available. 




In [None]:
# Cell 2: Configuration and setup
# EXPERIMENT CONFIGURATION

experiment_folder = Path.cwd().parent 
raw_data_folder = experiment_folder / 'raw_data'
figures_folder = experiment_folder / 'figures'
processed_data_folder = experiment_folder / 'processed_data'

os.environ["CELLPOSE_LOCAL_MODELS_PATH"] = str(experiment_folder.parent / '_pipeline_assets/cellpose_models/') # location of cellpose models
flatfiled_map_folder = figures_folder

# Create output folders if they don't exist
figures_folder.mkdir(exist_ok=True)
processed_data_folder.mkdir(exist_ok=True)

# ANALYSIS PARAMETERS
thickness_um = 20

# Channel order
channels_params = {
    'brightfield': 0,
    'spots': 1,
    'bags': 2
}

# Channel used for spot acquisition
spot_channel_params = {
    'FAM': False,
    'TAMRA': True
}

# Segmentation parameters
segmentation_params = {
    'diameter': 676,
    'model_name': 'cpsam_20x_downsampeled_20250630',
    'gpu': True
}

# Spot detection parameters
spot_detection_params = {
    'min_distance': 3,
    'threshold_abs': 800
}

# Background correction
background_sigma_params = {
    "segmentation_sigma": 40,
    "spot_sigma": 100
}

print(f"Experiment folder: {experiment_folder}")
print(f"Gel thickness: {thickness_um} μm")

In [None]:
# Cell 3: Define analysis functions
def gaussian_background_correction(image, sigma):
    """Estimate background with heavy gaussian blur"""
    background = skimage.filters.gaussian(image.astype(np.float32), sigma=sigma)
    corrected = image.astype(np.float32) - background
    return corrected

def get_flatfield_files(spot_channel_params):
    """Select appropriate flatfield .nd2 files based on channel parameters"""
    flatfield_files = {
    'BF': flatfiled_map_folder / 'flatfield_BF.nd2',
    'FAM': flatfiled_map_folder / 'flatfield_FAM.nd2',
    'TAMRA': flatfiled_map_folder / 'flatfield_TAMRA.nd2'
    }
    
    # Find which channel is active for spot detection
    active_channels = [channel for channel, is_active in spot_channel_params.items() if is_active]
    active_channel = active_channels[0]
    
    return {
        'segmentation': flatfield_files['BF'],
        'spot_detection': flatfield_files[active_channel]
    }    

def flatfield_correction(image, flatfield_image):
    """Flatfield correction based on previously taken flatfield images"""
    FF_image = nd2.imread(flatfield_image)
    mean_FF_BF = np.mean(FF_image)
    normalised_FF_BF = FF_image / mean_FF_BF
    corrected_image = image / normalised_FF_BF
    return corrected_image

def max_project_xarray(array):
    """Maximum projection of single xarray channel"""
    return np.max(array.values, axis=0)

def cellpose_bag(image):
    """Run bag pretrained cellpose SAM"""
    model = models.CellposeModel(
        gpu=segmentation_params['gpu'],
        pretrained_model=segmentation_params['model_name']
    )
    
    masks, flows, styles = model.eval(
        image,
        diameter=segmentation_params['diameter']
    )
    return masks

def detect_spots(image, mask):
    """Detect spots within a single mask"""
    masked_spots = image * mask
    coords = skimage.feature.peak_local_max(
        masked_spots,
        min_distance=spot_detection_params['min_distance'],
        threshold_abs=spot_detection_params['threshold_abs']
    )
    return coords

def calculate_roi_properties(mask, pixel_size_um):
    """Calculate ROI area and volume"""
    area_pixels = np.sum(mask)
    area_um2 = area_pixels * (pixel_size_um ** 2)
    volume_um3 = area_um2 * thickness_um
    return area_pixels, area_um2, volume_um3

print("Analysis functions defined ✓")

In [None]:
# Cell 4: Quality control figure function
def create_qc_figure(image_seg, image_spots, masks, all_coords, condition, image_num, save_path, pixel_size_um):
    """Create quality control figure"""
    fig, axes = plt.subplots(2, 2, figsize=(8, 10))
    
    # Original segmentation image
    axes[0, 0].imshow(image_seg, cmap='gray')
    axes[0, 0].set_title('Segmentation Channel')
    axes[0, 0].axis('off')
    
    # Segmentation with masks overlay
    axes[0, 1].imshow(image_seg, cmap='gray')
    axes[0, 1].imshow(masks, alpha=0.3, cmap='tab10')
    axes[0, 1].set_title(f'Segmentation + Masks ({masks.max()} ROIs)')
    axes[0, 1].axis('off')
    
    # Spots channel
    axes[1, 0].imshow(image_spots, cmap='twilight_shifted')
    axes[1, 0].set_title('Spots Channel')
    axes[1, 0].axis('off')
    
    # Spots with detections
    axes[1, 1].imshow(image_spots, cmap='twilight_shifted')
    if len(all_coords) > 0:
        all_coords_array = np.vstack(all_coords)
        axes[1, 1].scatter(all_coords_array[:, 1], all_coords_array[:, 0], 
                         s=10, c='red', marker='x', alpha=0.8)
    axes[1, 1].set_title(f'Spots + Detections ({len(all_coords)} total)')
    axes[1, 1].axis('off')
    
    # Add scale bar to first subplot
    scalebar = ScaleBar(pixel_size_um, units='um', location='lower right')
    axes[0, 0].add_artist(scalebar)
    
    plt.suptitle(f'{condition} - Image {image_num}', fontsize=14)
    plt.tight_layout()
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.close()

print("QC figure function defined ✓")

In [None]:
# Cell 5: Find and list files to process
# Assign appropriate flatfield images
flatfields = get_flatfield_files(spot_channel_params)

# Find all .nd2 files
nd2_files = list(raw_data_folder.glob('*.nd2'))

if not nd2_files:
    raise FileNotFoundError("No .nd2 files found in raw_data folder")

print(f"Found {len(nd2_files)} .nd2 files to process:")
for file_path in nd2_files:
    print(f"  - {file_path.name}")

# Initialize results storage
results = []

In [None]:
# Cell 6: Main processing loop
for file_path in nd2_files:
    condition = file_path.stem  # filename without extension
    print(f"\n{'='*50}")
    print(f"Processing {condition}...")
    print(f"{'='*50}")
    
    try:
        # Read image
        array = nd2.imread(file_path, xarray=True)
        print(f"Image dimensions: {array.sizes}")
        with nd2.ND2File(file_path) as nd2_file:
            pixel_size_um =nd2_file.voxel_size().x 
        
        # Process each field of view
        for p in tqdm(range(array.sizes['P']), desc=f"Processing {condition}"):
            print(f"\n  Field of view {p+1}/{array.sizes['P']}")
            
            # Preprocessing
            image_segmentation = array.isel(P=p, C=channels_params['brightfield'])
            image_segmentation_max = max_project_xarray(image_segmentation)
            corrected_segmentation = flatfield_correction(image_segmentation_max, flatfields['segmentation'])
            
            image_spots = array.isel(P=p, C=channels_params['spots'])
            image_spots_max = max_project_xarray(image_spots)
            corrected_spots = flatfield_correction(image_spots_max, flatfields['spot_detection'])
            
            # Segmentation
            print("    Running segmentation...")
            masks = cellpose_bag(corrected_segmentation)
            filtered_masks = skimage.segmentation.clear_border(masks, buffer_size=25)
            
            num_rois = filtered_masks.max()
            print(f"    Found {num_rois} gel bags")
            
            # Spot detection and analysis
            all_coords_for_qc = []
            
            for mask_id in range(1, num_rois + 1):
                single_mask = filtered_masks == mask_id
                
                # Detect spots
                coords = detect_spots(corrected_spots, single_mask)
                spot_count = len(coords)
                
                # Calculate ROI properties
                area_pixels, area_um2, volume_um3 = calculate_roi_properties(single_mask, pixel_size_um)
                
                # Calculate densities
                spots_per_area = spot_count / area_um2 if area_um2 > 0 else 0
                spots_per_volume = spot_count / volume_um3 if volume_um3 > 0 else 0
                
                # Store results
                result = {
                    'Experiment': experiment_folder.name,
                    'Condition': condition,
                    'Image_Number': p + 1,
                    'ROI': mask_id,
                    'Spot_Count': spot_count,
                    'ROI_Area_pixels': area_pixels,
                    'ROI_Area_um2': area_um2,
                    'ROI_Volume_um3': volume_um3,
                    'Spots_per_Area': spots_per_area,
                    'Spots_per_Volume': spots_per_volume
                }
                
                results.append(result)
                all_coords_for_qc.extend(coords)
            
            # Create QC figure
            qc_figure_path = figures_folder / f"{condition}_image_{p+1:03d}_QC.png"
            create_qc_figure(
                corrected_segmentation, corrected_spots, filtered_masks,
                all_coords_for_qc, condition, p+1, qc_figure_path, pixel_size_um
            )
            
            total_spots = len(all_coords_for_qc)
            print(f"    Average spots detected: {total_spots / num_rois}")
            print(f"    QC figure saved: {qc_figure_path.name}")
            
    except Exception as e:
        print(f"❌ Error processing {condition}: {str(e)}")
        warnings.warn(f"Failed to process {condition}: {str(e)}")

print(f"\n🎉 Processing complete! Analyzed {len(results)} ROIs total.")

In [None]:
# Cell 7: Save results to files
if not results:
    print("❌ No results to save")
else:
    df = pd.DataFrame(results)
    
    # Save as CSV
    csv_path = processed_data_folder / f"{experiment_folder.name}_results.csv"
    df.to_csv(csv_path, index=False)
    
    # Save as Excel with multiple sheets
    excel_path = processed_data_folder / f"{experiment_folder.name}_results.xlsx"
    with pd.ExcelWriter(excel_path, engine='openpyxl') as writer:
        df.to_excel(writer, sheet_name='All_Data', index=False)
        
        # Summary by condition
        summary = df.groupby('Condition').agg({
            'Spot_Count': ['count', 'mean', 'std', 'sum'],
            'ROI_Area_um2': ['mean', 'std'],
            'Spots_per_Area': ['mean', 'std'],
            'Spots_per_Volume': ['mean', 'std']
        }).round(3)
        
        summary.columns = ['_'.join(col).strip() for col in summary.columns]
        summary.reset_index().to_excel(writer, sheet_name='Summary_by_Condition', index=False)
    
    print(f"✅ Results saved to:")
    print(f"   📄 CSV: {csv_path}")
    print(f"   📊 Excel: {excel_path}")
    
    # Display basic statistics
    print(f"\n📈 Quick Statistics:")
    print(f"   Total ROIs analyzed: {len(df)}")
    print(f"   Conditions: {df['Condition'].nunique()}")
    print(f"   Total spots detected: {df['Spot_Count'].sum()}")
    print(f"   Average spots per ROI: {df['Spot_Count'].mean():.1f} ± {df['Spot_Count'].std():.1f}")

In [None]:
# Cell 8: Preview results
# Display first few rows and basic info
if results:
    df = pd.DataFrame(results)
    print("📋 Results Preview:")
    print(df.head(5))
    
    print(f"\n📊 Summary by Condition:")
    condition_summary = df.groupby('Condition').agg({
        'ROI': 'count',
        'Spot_Count': ['mean', 'std'],
        'ROI_Area_um2': ['mean', 'std'],
        'Spots_per_Area': ['mean', 'std']
    }).round(3)
    condition_summary.columns = ['_'.join(col).strip() for col in condition_summary.columns]
    print(condition_summary)

In [None]:
# Cell 9: Create summary analysis figures
if results:
    df = pd.DataFrame(results)
    
    # Calculate sample sizes for each condition
    sample_sizes = df.groupby('Condition').size()
    
    # Set up the plotting style
    plt.style.use('default')
    sns.set_palette("husl")
    
    # Figure 1: Spot counts by condition
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # Function to add sample size annotations
    def add_n_annotations(ax, group_col='Condition'):
        """Add sample size annotations to boxplot"""
        # Get unique conditions and their positions
        conditions = df[group_col].unique()
        
        for i, condition in enumerate(conditions):
            n = len(df[df[group_col] == condition])
            # Add annotation to the right of each boxplot
            ax.text(ax.get_xlim()[1] * 0.98, i, f'n={n}', 
                   verticalalignment='center', 
                   horizontalalignment='right',
                   fontweight='bold',
                   bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8))
    
    # Spot count distribution
    sns.boxplot(data=df, y='Condition', x='Spot_Count', ax=axes[0, 0])
    axes[0, 0].set_title('Spot Count Distribution by Condition')
    axes[0, 0].set_ylabel('')
    add_n_annotations(axes[0, 0], 'Spot_Count')
    
    # ROI area distribution
    sns.boxplot(data=df, y='Condition', x='ROI_Area_um2', ax=axes[0, 1])
    axes[0, 1].set_title('ROI Area Distribution by Condition')
    axes[0, 1].set_xlabel('ROI Area (μm²)')
    axes[0, 1].set_ylabel('')
    add_n_annotations(axes[0, 1], 'ROI_Area_um2')
    
    # Spots per area
    sns.boxplot(data=df, y='Condition', x='Spots_per_Area', ax=axes[1, 0])
    axes[1, 0].set_title('Spots per Area by Condition')
    axes[1, 0].set_xlabel('Spots per μm²')
    axes[1, 0].set_ylabel('')
    add_n_annotations(axes[1, 0], 'Spots_per_Area')
    
    # Spots per volume
    sns.boxplot(data=df, y='Condition', x='Spots_per_Volume', ax=axes[1, 1])
    axes[1, 1].set_title('Spots per Volume by Condition')
    axes[1, 1].set_xlabel('Spots per μm³')
    axes[1, 1].set_ylabel('')
    add_n_annotations(axes[1, 1], 'Spots_per_Volume')
    
    plt.tight_layout()
    summary_path = figures_folder / 'Summary_Analysis.png'
    plt.savefig(summary_path, dpi=300, bbox_inches='tight')
    plt.show()
    
    print(f"✅ Summary figure saved: {summary_path}")

In [None]:
# Cell 10: Create correlation analysis figures
if results:
    df = pd.DataFrame(results)
    
    # Figure 2: Correlation plot
    fig, ax = plt.subplots(1, 1, figsize=(6, 5))
    
    # Spot count vs ROI area
    sns.scatterplot(data=df, x='ROI_Area_um2', y='Spot_Count', hue='Condition', ax=ax)
    ax.set_title('Spot Count vs ROI Area')
    ax.set_xlabel('ROI Area (μm²)')
    ax.set_ylabel('Spot Count')
    
    plt.tight_layout()
    correlation_path = figures_folder / 'Correlation_Analysis.png'
    plt.savefig(correlation_path, dpi=300, bbox_inches='tight')
    plt.show()
    
    print(f"✅ Correlation figure saved: {correlation_path}")