In [21]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True) # Force remount to override existing mount

ValueError: Mountpoint must not already contain files

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from skimage import io, measure, segmentation, feature
from scipy import ndimage
from collections import Counter
import tifffile
from pathlib import Path
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection

# Set up matplotlib for better visualization
plt.rcParams['figure.figsize'] = (12, 10)
plt.rcParams['figure.dpi'] = 100
plt.style.use('ggplot')

# Define paths based on your Google Drive structure
base_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented/Static-A-2"
output_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-A-2"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Define function to extract sample ID from filenames
def extract_sample_id(filename):
    # Example: denoised_0Pa_A1_20dec21_40x_L2RA_FlatA_seq007_contrast_Nuclei_mask.tif
    # We want to extract everything before the component name (Nuclei, Golgi, etc.)
    parts = str(filename).split('_')
    # Find the index of component parts (Nuclei, Golgi, membrane, cell)
    components = ['Nuclei', 'Golgi', 'membrane', 'cell']
    for i, part in enumerate(parts):
        if part in components:
            return '_'.join(parts[:i])
    return None  # If no match found

# Function to find matching files across different folders
def find_matching_files(base_dir):
    # Define component folders to look in (based on your Google Drive structure)
    component_folders = ['Nuclei', 'Membrane_Adjusted', 'Golgi', 'Cell']

    # Dictionary to store file paths for each sample and component
    file_dict = {}

    # Scan through each component folder
    for component in component_folders:
        component_dir = os.path.join(base_dir, component)
        if not os.path.exists(component_dir):
            print(f"Warning: {component_dir} does not exist.")
            continue

        # Get all TIFF files in the component folder
        files = [f for f in os.listdir(component_dir) if f.endswith('.tif')]
        print(f"Found {len(files)} TIFF files in {component} folder.")

        for file in files:
            # Extract the sample ID from the filename
            sample_id = extract_sample_id(file)
            if sample_id:
                if sample_id not in file_dict:
                    file_dict[sample_id] = {}

                # Store the full path to the file
                # Use 'membrane' as the key for consistency even though folder is Membrane_Adjusted
                if component == 'Membrane_Adjusted':
                    file_dict[sample_id]['membrane'] = os.path.join(component_dir, file)
                else:
                    file_dict[sample_id][component.lower()] = os.path.join(component_dir, file)

    # Find samples that have files in all components
    complete_samples = []
    for sample_id, components in file_dict.items():
        # Check if we have at least nuclei and cell data
        if 'nuclei' in components and 'cell' in components:
            complete_samples.append(sample_id)

    print(f"Found {len(complete_samples)} complete samples with at least nuclei and cell data.")

    return file_dict, complete_samples

# Function to load images for a specific sample
def load_sample_images(sample_id, file_dict):
    images = {}
    for component, filepath in file_dict[sample_id].items():
        if os.path.exists(filepath):
            try:
                # Read image and ensure it's an integer type
                img = io.imread(filepath)

                # Convert boolean images to uint8
                if img.dtype == bool:
                    img = img.astype(np.uint8)

                # Handle binary images that should be labeled
                if component in ['cell', 'nuclei'] and np.max(img) <= 1:
                    print(f"Converting binary {component} image to labeled image")
                    img, num_labels = ndimage.label(img)
                    print(f"Found {num_labels} {component} regions")

                images[component] = img
                print(f"Loaded {component} image: shape {img.shape}, dtype: {img.dtype}, value range [{np.min(img)}, {np.max(img)}]")
            except Exception as e:
                print(f"Error loading {filepath}: {str(e)}")
        else:
            print(f"Warning: File not found - {filepath}")

    return images

# Function to detect senescent cells
def detect_senescent_cells(cell_image, nucleus_image, expected_senescent_fraction=0.3,
                     size_threshold_factor=1.5, holes_ratio_quantile=0.7):
    """
    Detect senescent cells based on multiple features and adjust masks accordingly.

    Parameters:
    cell_image: Label image of cells
    nucleus_image: Label image of nuclei
    expected_senescent_fraction: Expected fraction of senescent cells (default 0.3)
    size_threshold_factor: Factor multiplied by std dev to set size threshold (default 1.5)
    holes_ratio_quantile: Quantile threshold for holes-to-cell ratio (default 0.7)

    Returns:
    Dictionary with detection results and adjusted masks
    """
    # Ensure we have integer type images
    cell_image = cell_image.astype(np.int32)
    nucleus_image = nucleus_image.astype(np.int32)

    # Get cell properties
    cell_props = measure.regionprops(cell_image)

    # Prepare arrays for storing metrics
    cell_metrics = []

    # Process each cell
    for cell_prop in cell_props:
        cell_id = cell_prop.label

        # Skip very small objects (likely artifacts)
        if cell_prop.area < 100:
            continue

        # Create binary mask for current cell
        cell_mask = (cell_image == cell_id)

        # Count nuclei that overlap with this cell
        nuclei_in_cell = np.unique(nucleus_image[cell_mask])
        nuclei_in_cell = nuclei_in_cell[nuclei_in_cell > 0]  # Remove background (0)
        nuclei_count = len(nuclei_in_cell)

        # Calculate total nuclear area within this cell
        nuclear_area = np.sum(np.isin(nucleus_image, nuclei_in_cell) & cell_mask)
        nuclear_cytoplasmic_ratio = nuclear_area / cell_prop.area if cell_prop.area > 0 else 0

        # Calculate hole properties
        # Invert the cell mask to detect holes
        filled_mask = ndimage.binary_fill_holes(cell_mask)
        holes_mask = filled_mask & ~cell_mask

        # Label the holes
        labeled_holes, num_holes = ndimage.label(holes_mask)
        hole_sizes = [np.sum(labeled_holes == i) for i in range(1, num_holes + 1)]
        total_hole_area = np.sum(holes_mask)
        holes_to_cell_ratio = total_hole_area / cell_prop.area if cell_prop.area > 0 else 0

        # Extract shape metrics
        perimeter = cell_prop.perimeter if cell_prop.perimeter else 0
        circularity = (4 * np.pi * cell_prop.area) / (perimeter * perimeter) if perimeter > 0 else 0
        solidity = cell_prop.solidity

        # Store all metrics
        metrics = {
            'cell_id': cell_id,
            'area': cell_prop.area,
            'perimeter': perimeter,
            'circularity': circularity,
            'solidity': solidity,
            'nuclei_count': nuclei_count,
            'nuclear_area': nuclear_area,
            'nuclear_cytoplasmic_ratio': nuclear_cytoplasmic_ratio,
            'num_holes': num_holes,
            'total_hole_area': total_hole_area,
            'holes_to_cell_ratio': holes_to_cell_ratio
        }

        cell_metrics.append(metrics)

    # Handle empty metrics case
    if not cell_metrics:
        print("No valid cells found for analysis")
        return {
            'cell_metrics': pd.DataFrame(),
            'original_cell_image': cell_image,
            'adjusted_cell_image': cell_image.copy(),
            'senescent_count': 0,
            'total_cells': 0,
            'senescent_fraction': 0
        }

    # Convert to DataFrame for easier analysis
    metrics_df = pd.DataFrame(cell_metrics)

    # Calculate thresholds for senescence detection
    # We'll use multiple features with adaptive thresholds

    # 1. Cell size threshold (senescent cells are larger)
    size_mean = metrics_df['area'].mean()
    size_std = metrics_df['area'].std()
    size_threshold = size_mean + size_threshold_factor * size_std  # Cells above threshold are considered large

    # 2. Multinucleation threshold
    multinucleated = metrics_df['nuclei_count'] > 1

    # 3. Holes-to-cell ratio threshold (senescent cells may have more holes)
    holes_ratio_threshold = metrics_df['holes_to_cell_ratio'].quantile(holes_ratio_quantile)  # Top 30% by default

    # Log thresholds for debugging
    print(f"Size threshold: {size_threshold:.1f} pixels (mean {size_mean:.1f} + {size_threshold_factor} * std {size_std:.1f})")
    print(f"Holes ratio threshold: {holes_ratio_threshold:.4f} (quantile {holes_ratio_quantile})")

    # Combine features to classify senescent cells
    # Initialize senescence score
    metrics_df['senescence_score'] = 0

    # Add points for each senescence indicator
    metrics_df.loc[metrics_df['area'] > size_threshold, 'senescence_score'] += 1
    metrics_df.loc[multinucleated, 'senescence_score'] += 1
    metrics_df.loc[metrics_df['holes_to_cell_ratio'] > holes_ratio_threshold, 'senescence_score'] += 1

    # Dynamically set threshold to achieve expected senescent fraction
    # Handle the case where all cells might have the same score
    unique_scores = metrics_df['senescence_score'].unique()
    if len(unique_scores) == 1:
        # If all cells have the same score, use that score as threshold
        # Will classify all as senescent if score > 0, none if score = 0
        score_threshold = unique_scores[0]
    else:
        # Normal case - use quantile to set threshold
        score_threshold = np.quantile(metrics_df['senescence_score'], 1 - expected_senescent_fraction)

    # Classify cells
    metrics_df['is_senescent'] = metrics_df['senescence_score'] >= score_threshold

    # Create adjusted cell masks by filling holes in senescent cells
    adjusted_cell_image = cell_image.copy()

    for _, row in metrics_df[metrics_df['is_senescent']].iterrows():
        cell_id = int(row['cell_id'])
        cell_mask = (cell_image == cell_id)

        # Fill holes in senescent cells
        filled_mask = ndimage.binary_fill_holes(cell_mask)

        # Update the adjusted image with the filled mask
        # We need to handle overlaps with other cells
        # First, remove the original cell
        adjusted_cell_image[cell_mask] = 0

        # Then add the filled version
        adjusted_cell_image[filled_mask] = cell_id

    # Calculate statistics
    senescent_count = metrics_df['is_senescent'].sum()
    total_cells = len(metrics_df)
    senescent_fraction = senescent_count / total_cells if total_cells > 0 else 0

    print(f"Detected {senescent_count} senescent cells out of {total_cells} total cells ({senescent_fraction:.2%})")

    # Return results
    return {
        'cell_metrics': metrics_df,
        'original_cell_image': cell_image,
        'adjusted_cell_image': adjusted_cell_image,
        'senescent_count': senescent_count,
        'total_cells': total_cells,
        'senescent_fraction': senescent_fraction
    }

# Function to visualize senescence detection results
def visualize_senescence_detection(results, cell_image, nucleus_image, output_path=None):
    """
    Visualize the senescence detection results.

    Parameters:
    results: Results from detect_senescent_cells
    cell_image: Original cell label image
    nucleus_image: Original nucleus label image
    output_path: Path to save the visualization
    """
    # Check if we have valid results
    if results['total_cells'] == 0:
        print("No valid cells to visualize")
        return

    # Create figure with subplots
    fig, axes = plt.subplots(2, 2, figsize=(16, 14))

    # 1. Original cell image with nuclei overlay
    cell_boundary = segmentation.find_boundaries(cell_image)
    nucleus_boundary = segmentation.find_boundaries(nucleus_image)

    overlay = np.zeros((*cell_image.shape, 3), dtype=np.uint8)
    # Cell boundaries in green
    overlay[cell_boundary, 0] = 0
    overlay[cell_boundary, 1] = 255
    overlay[cell_boundary, 2] = 0
    # Nucleus boundaries in blue
    overlay[nucleus_boundary, 0] = 0
    overlay[nucleus_boundary, 1] = 0
    overlay[nucleus_boundary, 2] = 255

    axes[0, 0].imshow(overlay)
    axes[0, 0].set_title("Original Cells (green) and Nuclei (blue)")
    axes[0, 0].axis('off')

    # 2. Senescent vs normal cells
    senescent_mask = np.zeros_like(cell_image, dtype=bool)
    normal_mask = np.zeros_like(cell_image, dtype=bool)

    cell_metrics = results['cell_metrics']

    for _, row in cell_metrics.iterrows():
        cell_id = int(row['cell_id'])
        cell_mask = (cell_image == cell_id)

        if row['is_senescent']:
            senescent_mask |= cell_mask
        else:
            normal_mask |= cell_mask

    classification = np.zeros((*cell_image.shape, 3), dtype=np.uint8)
    # Senescent cells in red
    classification[senescent_mask, 0] = 255
    classification[senescent_mask, 1] = 0
    classification[senescent_mask, 2] = 0
    # Normal cells in green
    classification[normal_mask, 0] = 0
    classification[normal_mask, 1] = 255
    classification[normal_mask, 2] = 0

    axes[0, 1].imshow(classification)
    axes[0, 1].set_title(f"Senescent Cells (red): {results['senescent_fraction']:.1%}")
    axes[0, 1].axis('off')

    # 3. Adjusted cell masks
    adjusted_boundary = segmentation.find_boundaries(results['adjusted_cell_image'])

    adjusted_overlay = np.zeros((*cell_image.shape, 3), dtype=np.uint8)
    # Original cell boundaries in green
    adjusted_overlay[cell_boundary, 0] = 0
    adjusted_overlay[cell_boundary, 1] = 255
    adjusted_overlay[cell_boundary, 2] = 0
    # Adjusted cell boundaries in yellow
    adjusted_overlay[adjusted_boundary, 0] = 255
    adjusted_overlay[adjusted_boundary, 1] = 255
    adjusted_overlay[adjusted_boundary, 2] = 0

    axes[1, 0].imshow(adjusted_overlay)
    axes[1, 0].set_title("Original (green) vs Adjusted (yellow) Cell Boundaries")
    axes[1, 0].axis('off')

    # 4. Metrics visualization
    senescent_metrics = cell_metrics[cell_metrics['is_senescent']]
    normal_metrics = cell_metrics[~cell_metrics['is_senescent']]

    # Choose one key metric for visualization
    metric = 'area'

    if len(senescent_metrics) > 0 and len(normal_metrics) > 0:
        axes[1, 1].boxplot([normal_metrics[metric], senescent_metrics[metric]],
                          labels=['Normal', 'Senescent'])
        axes[1, 1].set_ylabel(f'Cell {metric}')
        axes[1, 1].set_title(f'Distribution of {metric} by Cell Type')
    else:
        axes[1, 1].text(0.5, 0.5, "Not enough data for box plot",
                        ha='center', va='center', transform=axes[1, 1].transAxes)

    # Add metrics summary as text
    if len(senescent_metrics) > 0:
        sen_text = (f"Senescent cells (n={len(senescent_metrics)}):\n" +
                   f"Mean area: {senescent_metrics['area'].mean():.1f}\n" +
                   f"Mean nuclei count: {senescent_metrics['nuclei_count'].mean():.1f}")
    else:
        sen_text = "No senescent cells detected"

    if len(normal_metrics) > 0:
        norm_text = (f"Normal cells (n={len(normal_metrics)}):\n" +
                    f"Mean area: {normal_metrics['area'].mean():.1f}\n" +
                    f"Mean nuclei count: {normal_metrics['nuclei_count'].mean():.1f}")
    else:
        norm_text = "No normal cells detected"

    fig.text(0.02, 0.02, sen_text, fontsize=10)
    fig.text(0.52, 0.02, norm_text, fontsize=10)

    plt.tight_layout()

    if output_path:
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
        print(f"Visualization saved to {output_path}")

    plt.show()

# Function to create additional visualizations for multi-nucleated cells
def visualize_multinucleated_cells(results, cell_image, nucleus_image, output_path=None):
    """
    Create visualization specifically highlighting multinucleated cells.

    Parameters:
    results: Results from detect_senescent_cells
    cell_image: Original cell label image
    nucleus_image: Original nucleus label image
    output_path: Path to save the visualization
    """
    if results['total_cells'] == 0:
        print("No valid cells to visualize")
        return

    # Get metrics
    cell_metrics = results['cell_metrics']

    # Create figure
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))

    # Prepare colored image based on nuclei count
    multi_colored = np.zeros((*cell_image.shape, 3), dtype=np.uint8)

    # Color scheme:
    # 0 nuclei: gray
    # 1 nucleus: blue
    # 2 nuclei: green
    # 3+ nuclei: red
    color_map = {
        0: [100, 100, 100],  # Gray
        1: [0, 0, 255],      # Blue
        2: [0, 255, 0],      # Green
        3: [255, 0, 0]       # Red (3 or more)
    }

    # Apply colors
    for _, row in cell_metrics.iterrows():
        cell_id = int(row['cell_id'])
        nuclei_count = min(3, row['nuclei_count'])  # Cap at 3+ for coloring

        cell_mask = (cell_image == cell_id)
        color = color_map[nuclei_count]

        multi_colored[cell_mask, 0] = color[0]
        multi_colored[cell_mask, 1] = color[1]
        multi_colored[cell_mask, 2] = color[2]

    # Draw nuclei boundaries
    nucleus_boundary = segmentation.find_boundaries(nucleus_image)
    multi_colored[nucleus_boundary] = [255, 255, 255]  # White nucleus boundaries

    # Display colored image
    axes[0].imshow(multi_colored)
    axes[0].set_title("Cell Nuclei Count\nGray: 0, Blue: 1, Green: 2, Red: 3+")
    axes[0].axis('off')

    # Create distribution bar chart
    nuclei_counts = cell_metrics['nuclei_count'].value_counts().sort_index()

    # Ensure we have entries for 0, 1, 2, 3+ nuclei
    counts = [0, 0, 0, 0]
    for count, freq in nuclei_counts.items():
        if count >= 3:
            counts[3] += freq
        else:
            counts[count] = freq

    # Plot bar chart
    bars = axes[1].bar(['0', '1', '2', '3+'], counts, color=['gray', 'blue', 'green', 'red'])

    # Add count labels on bars
    for bar in bars:
        height = bar.get_height()
        axes[1].text(bar.get_x() + bar.get_width()/2., height + 0.1,
                    f'{int(height)}', ha='center', va='bottom')

    axes[1].set_xlabel('Number of Nuclei')
    axes[1].set_ylabel('Number of Cells')
    axes[1].set_title('Distribution of Nuclei Count')

    plt.tight_layout()

    if output_path:
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
        print(f"Multinucleated visualization saved to {output_path}")

    plt.show()

# Function to process a sample for senescence analysis
def process_senescence_analysis(sample_id, file_dict, output_dir):
    """
    Process a sample for senescence analysis.

    Parameters:
    sample_id: ID of the sample to process
    file_dict: Dictionary mapping sample IDs to component file paths
    output_dir: Directory to save outputs

    Returns:
    Results from senescence detection
    """
    print(f"Processing senescence analysis for sample {sample_id}")

    # Create sample-specific output directory
    sample_output_dir = os.path.join(output_dir, sample_id)
    os.makedirs(sample_output_dir, exist_ok=True)

    # Load images
    images = load_sample_images(sample_id, file_dict)

    if 'cell' not in images or 'nuclei' not in images:
        print(f"Error: Required cell or nuclei image not found for sample {sample_id}")
        return None

    # Run senescence detection
    results = detect_senescent_cells(
        images['cell'],
        images['nuclei'],
        expected_senescent_fraction=0.3  # From 30% TNF-a treated cells
    )

    # Skip saving if no valid cells found
    if results['total_cells'] == 0:
        print(f"No valid cells found in sample {sample_id}, skipping output generation")
        return results

    # Save results
    # 1. Save metrics as CSV
    metrics_file = os.path.join(sample_output_dir, f"{sample_id}_senescence_metrics.csv")
    results['cell_metrics'].to_csv(metrics_file, index=False)
    print(f"Saved cell metrics to {metrics_file}")

    # 2. Save adjusted cell mask
    adjusted_mask_file = os.path.join(sample_output_dir, f"{sample_id}_adjusted_cell_mask.tif")
    io.imsave(adjusted_mask_file, results['adjusted_cell_image'].astype(np.uint16))
    print(f"Saved adjusted cell mask to {adjusted_mask_file}")

    # 3. Create and save visualization
    vis_file = os.path.join(sample_output_dir, f"{sample_id}_senescence_visualization.png")
    visualize_senescence_detection(results, images['cell'], images['nuclei'], vis_file)

    # 4. Create and save multinucleated visualization
    multi_vis_file = os.path.join(sample_output_dir, f"{sample_id}_multinucleated_visualization.png")
    visualize_multinucleated_cells(results, images['cell'], images['nuclei'], multi_vis_file)

    # 5. Summary statistics
    summary = {
        'sample_id': sample_id,
        'total_cells': results['total_cells'],
        'senescent_cells': results['senescent_count'],
        'senescent_fraction': results['senescent_fraction'],
        'normal_cells': results['total_cells'] - results['senescent_count'],
        'normal_fraction': 1 - results['senescent_fraction']
    }

    # Add more detailed metrics if available
    if not results['cell_metrics'].empty:
        # Compare senescent vs normal cells
        senescent = results['cell_metrics'][results['cell_metrics']['is_senescent']]
        normal = results['cell_metrics'][~results['cell_metrics']['is_senescent']]

        # Add size metrics
        if not senescent.empty and not normal.empty:
            summary['senescent_mean_area'] = senescent['area'].mean()
            summary['normal_mean_area'] = normal['area'].mean()
            summary['size_ratio'] = summary['senescent_mean_area'] / summary['normal_mean_area']

            summary['senescent_mean_nuclei'] = senescent['nuclei_count'].mean()
            summary['normal_mean_nuclei'] = normal['nuclei_count'].mean()

            summary['senescent_multi_nuclei_pct'] = (senescent['nuclei_count'] > 1).mean()
            summary['normal_multi_nuclei_pct'] = (normal['nuclei_count'] > 1).mean()

    # Save summary as CSV
    summary_df = pd.DataFrame([summary])
    summary_file = os.path.join(sample_output_dir, f"{sample_id}_senescence_summary.csv")
    summary_df.to_csv(summary_file, index=False)
    print(f"Saved summary to {summary_file}")

    return results

# Function to compile results across all samples
def compile_cross_sample_results(output_dir, sample_ids):
    """Compile results across all analyzed samples"""

    all_summaries = []
    all_metrics = []

    for sample_id in sample_ids:
        sample_dir = os.path.join(output_dir, sample_id)

        # Read summary file
        summary_file = os.path.join(sample_dir, f"{sample_id}_senescence_summary.csv")
        if os.path.exists(summary_file):
            summary = pd.read_csv(summary_file)
            all_summaries.append(summary)

        # Read metrics file
        metrics_file = os.path.join(sample_dir, f"{sample_id}_senescence_metrics.csv")
        if os.path.exists(metrics_file):
            metrics = pd.read_csv(metrics_file)
            metrics['sample_id'] = sample_id  # Add sample ID
            all_metrics.append(metrics)

    # Combine all summaries
    if all_summaries:
        combined_summary = pd.concat(all_summaries, ignore_index=True)
        summary_output = os.path.join(output_dir, "all_samples_senescence_summary.csv")
        combined_summary.to_csv(summary_output, index=False)
        print(f"Saved combined summary to {summary_output}")

        # Create a visualization of senescence percentages across samples
        plt.figure(figsize=(12, 6))
        plt.bar(combined_summary['sample_id'],
                combined_summary['senescent_fraction'] * 100)
        plt.xlabel('Sample ID')
        plt.ylabel('Senescent Cells (%)')
        plt.title('Percentage of Senescent Cells Across Samples')
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, "senescence_percentages.png"), dpi=300)
        plt.close()

    # Combine all metrics
    if all_metrics:
        combined_metrics = pd.concat(all_metrics, ignore_index=True)
        metrics_output = os.path.join(output_dir, "all_samples_cell_metrics.csv")
        combined_metrics.to_csv(metrics_output, index=False)
        print(f"Saved combined metrics to {metrics_output}")

        # Create boxplots comparing senescent vs normal cells
        if 'is_senescent' in combined_metrics.columns:
            senescent = combined_metrics[combined_metrics['is_senescent']]
            normal = combined_metrics[~combined_metrics['is_senescent']]

            metrics_to_plot = ['area', 'nuclei_count', 'solidity', 'circularity']
            fig, axes = plt.subplots(2, 2, figsize=(14, 12))
            axes = axes.flatten()

            for i, metric in enumerate(metrics_to_plot):
                if metric in combined_metrics.columns:
                    if len(senescent) > 0 and len(normal) > 0:
                        axes[i].boxplot([normal[metric], senescent[metric]],
                                      labels=['Normal', 'Senescent'])
                        axes[i].set_ylabel(metric)
                        axes[i].set_title(f'{metric} Distribution')

            plt.tight_layout()
            plt.savefig(os.path.join(output_dir, "senescence_metrics_comparison.png"), dpi=300)
            plt.close()

            # Create scatter plot of area vs nuclei count
            plt.figure(figsize=(10, 8))
            plt.scatter(normal['area'], normal['nuclei_count'],
                      alpha=0.5, label='Normal', color='blue')
            plt.scatter(senescent['area'], senescent['nuclei_count'],
                      alpha=0.5, label='Senescent', color='red')
            plt.xlabel('Cell Area')
            plt.ylabel('Nuclei Count')
            plt.title('Cell Area vs Nuclei Count')
            plt.legend()
            plt.tight_layout()
            plt.savefig(os.path.join(output_dir, "area_vs_nuclei_scatter.png"), dpi=300)
            plt.close()

    print("Compiled results across all samples")

# Function to run all processing
def main():
    print("Starting senescent cell analysis pipeline...")

    # Find matching files
    file_dict, complete_samples = find_matching_files(base_dir)

    if not complete_samples:
        print("No complete samples found. Exiting.")
        return

    # Process all samples
    for sample_id in complete_samples:
        print(f"\nProcessing sample {sample_id}")
        senescence_results = process_senescence_analysis(sample_id, file_dict, output_dir)

        if senescence_results:
            print(f"Completed senescence analysis for sample {sample_id}")
            print(f"Found {senescence_results['senescent_count']} senescent cells " +
                  f"({senescence_results['senescent_fraction']:.1%}) out of {senescence_results['total_cells']} total cells")

    # Compile results across all samples
    compile_cross_sample_results(output_dir, complete_samples)

    print("Senescence analysis pipeline completed!")

# Run the main function
if __name__ == "__main__":
    main()

ValueError: Mountpoint must not already contain files

In [None]:
# Mount Google Drive with force_remount option
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from skimage import io, measure, segmentation, feature
from scipy import ndimage
from collections import Counter
import tifffile
from pathlib import Path
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection

# Set up matplotlib for better visualization
plt.rcParams['figure.figsize'] = (12, 10)
plt.rcParams['figure.dpi'] = 100
plt.style.use('ggplot')

# Define paths based on your Google Drive structure
base_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented/Static-A-2"
output_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-A-2"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Define function to extract sample ID from filenames
def extract_sample_id(filename):
    # Example: denoised_0Pa_A1_20dec21_40x_L2RA_FlatA_seq007_contrast_Nuclei_mask.tif
    # We want to extract everything before the component name (Nuclei, Golgi, etc.)
    parts = str(filename).split('_')
    # Find the index of component parts (Nuclei, Golgi, membrane, cell)
    components = ['Nuclei', 'Golgi', 'membrane', 'cell']
    for i, part in enumerate(parts):
        if part in components:
            return '_'.join(parts[:i])
    return None  # If no match found

# Function to find matching files across different folders
def find_matching_files(base_dir):
    # Define component folders to look in (based on your Google Drive structure)
    component_folders = ['Nuclei', 'Membrane_Adjusted', 'Golgi', 'Cell']

    # Dictionary to store file paths for each sample and component
    file_dict = {}

    # Scan through each component folder
    for component in component_folders:
        component_dir = os.path.join(base_dir, component)
        if not os.path.exists(component_dir):
            print(f"Warning: {component_dir} does not exist.")
            continue

        # Get all TIFF files in the component folder
        files = [f for f in os.listdir(component_dir) if f.endswith('.tif')]
        print(f"Found {len(files)} TIFF files in {component} folder.")

        for file in files:
            # Extract the sample ID from the filename
            sample_id = extract_sample_id(file)
            if sample_id:
                if sample_id not in file_dict:
                    file_dict[sample_id] = {}

                # Store the full path to the file
                # Use 'membrane' as the key for consistency even though folder is Membrane_Adjusted
                if component == 'Membrane_Adjusted':
                    file_dict[sample_id]['membrane'] = os.path.join(component_dir, file)
                else:
                    file_dict[sample_id][component.lower()] = os.path.join(component_dir, file)

    # Find samples that have files in all components
    complete_samples = []
    for sample_id, components in file_dict.items():
        # Check if we have at least nuclei and cell data
        if 'nuclei' in components and 'cell' in components:
            complete_samples.append(sample_id)

    print(f"Found {len(complete_samples)} complete samples with at least nuclei and cell data.")

    return file_dict, complete_samples

# Function to load images for a specific sample
def load_sample_images(sample_id, file_dict):
    images = {}
    for component, filepath in file_dict[sample_id].items():
        if os.path.exists(filepath):
            try:
                # Read image and ensure it's an integer type
                img = io.imread(filepath)

                # Convert boolean images to uint8
                if img.dtype == bool:
                    img = img.astype(np.uint8)

                # Handle binary images that should be labeled
                if component in ['cell', 'nuclei'] and np.max(img) <= 1:
                    print(f"Converting binary {component} image to labeled image")
                    img, num_labels = ndimage.label(img)
                    print(f"Found {num_labels} {component} regions")

                images[component] = img
                print(f"Loaded {component} image: shape {img.shape}, dtype: {img.dtype}, value range [{np.min(img)}, {np.max(img)}]")
            except Exception as e:
                print(f"Error loading {filepath}: {str(e)}")
        else:
            print(f"Warning: File not found - {filepath}")

    return images

# Function to detect senescent cells
def detect_senescent_cells(cell_image, nucleus_image, expected_senescent_fraction=0.3,
                     size_threshold_factor=1.5, holes_ratio_quantile=0.7):
    """
    Detect senescent cells based on multiple features and adjust masks accordingly.

    Parameters:
    cell_image: Label image of cells
    nucleus_image: Label image of nuclei
    expected_senescent_fraction: Expected fraction of senescent cells (default 0.3)
    size_threshold_factor: Factor multiplied by std dev to set size threshold (default 1.5)
    holes_ratio_quantile: Quantile threshold for holes-to-cell ratio (default 0.7)

    Returns:
    Dictionary with detection results and adjusted masks
    """
    # Ensure we have integer type images
    cell_image = cell_image.astype(np.int32)
    nucleus_image = nucleus_image.astype(np.int32)

    # Get cell properties
    cell_props = measure.regionprops(cell_image)

    # Prepare arrays for storing metrics
    cell_metrics = []

    # Process each cell
    for cell_prop in cell_props:
        cell_id = cell_prop.label

        # Skip very small objects (likely artifacts)
        if cell_prop.area < 100:
            continue

        # Create binary mask for current cell
        cell_mask = (cell_image == cell_id)

        # Count nuclei that overlap with this cell
        nuclei_in_cell = np.unique(nucleus_image[cell_mask])
        nuclei_in_cell = nuclei_in_cell[nuclei_in_cell > 0]  # Remove background (0)
        nuclei_count = len(nuclei_in_cell)

        # Calculate total nuclear area within this cell
        nuclear_area = np.sum(np.isin(nucleus_image, nuclei_in_cell) & cell_mask)
        nuclear_cytoplasmic_ratio = nuclear_area / cell_prop.area if cell_prop.area > 0 else 0

        # Calculate hole properties
        # Invert the cell mask to detect holes
        filled_mask = ndimage.binary_fill_holes(cell_mask)
        holes_mask = filled_mask & ~cell_mask

        # Label the holes
        labeled_holes, num_holes = ndimage.label(holes_mask)
        hole_sizes = [np.sum(labeled_holes == i) for i in range(1, num_holes + 1)]
        total_hole_area = np.sum(holes_mask)
        holes_to_cell_ratio = total_hole_area / cell_prop.area if cell_prop.area > 0 else 0

        # Extract shape metrics
        perimeter = cell_prop.perimeter if cell_prop.perimeter else 0
        circularity = (4 * np.pi * cell_prop.area) / (perimeter * perimeter) if perimeter > 0 else 0
        solidity = cell_prop.solidity

        # Store all metrics
        metrics = {
            'cell_id': cell_id,
            'area': cell_prop.area,
            'perimeter': perimeter,
            'circularity': circularity,
            'solidity': solidity,
            'nuclei_count': nuclei_count,
            'nuclear_area': nuclear_area,
            'nuclear_cytoplasmic_ratio': nuclear_cytoplasmic_ratio,
            'num_holes': num_holes,
            'total_hole_area': total_hole_area,
            'holes_to_cell_ratio': holes_to_cell_ratio
        }

        cell_metrics.append(metrics)

    # Handle empty metrics case
    if not cell_metrics:
        print("No valid cells found for analysis")
        return {
            'cell_metrics': pd.DataFrame(),
            'original_cell_image': cell_image,
            'adjusted_cell_image': cell_image.copy(),
            'senescent_count': 0,
            'total_cells': 0,
            'senescent_fraction': 0
        }

    # Convert to DataFrame for easier analysis
    metrics_df = pd.DataFrame(cell_metrics)

    # Calculate thresholds for senescence detection
    # We'll use multiple features with adaptive thresholds

    # 1. Cell size threshold (senescent cells are larger)
    size_mean = metrics_df['area'].mean()
    size_std = metrics_df['area'].std()
    size_threshold = size_mean + size_threshold_factor * size_std  # Cells above threshold are considered large

    # 2. Multinucleation threshold
    multinucleated = metrics_df['nuclei_count'] > 1

    # 3. Holes-to-cell ratio threshold (senescent cells may have more holes)
    holes_ratio_threshold = metrics_df['holes_to_cell_ratio'].quantile(holes_ratio_quantile)  # Top 30% by default

    # Log thresholds for debugging
    print(f"Size threshold: {size_threshold:.1f} pixels (mean {size_mean:.1f} + {size_threshold_factor} * std {size_std:.1f})")
    print(f"Holes ratio threshold: {holes_ratio_threshold:.4f} (quantile {holes_ratio_quantile})")

    # Combine features to classify senescent cells
    # Initialize senescence score
    metrics_df['senescence_score'] = 0

    # Add points for each senescence indicator
    metrics_df.loc[metrics_df['area'] > size_threshold, 'senescence_score'] += 1
    metrics_df.loc[multinucleated, 'senescence_score'] += 1
    metrics_df.loc[metrics_df['holes_to_cell_ratio'] > holes_ratio_threshold, 'senescence_score'] += 1

    # Dynamically set threshold to achieve expected senescent fraction
    # Handle the case where all cells might have the same score
    unique_scores = metrics_df['senescence_score'].unique()
    if len(unique_scores) == 1:
        # If all cells have the same score, use that score as threshold
        # Will classify all as senescent if score > 0, none if score = 0
        score_threshold = unique_scores[0]
    else:
        # Normal case - use quantile to set threshold
        score_threshold = np.quantile(metrics_df['senescence_score'], 1 - expected_senescent_fraction)

    # Classify cells
    metrics_df['is_senescent'] = metrics_df['senescence_score'] >= score_threshold

    # Create adjusted cell masks by filling holes in senescent cells
    adjusted_cell_image = cell_image.copy()

    for _, row in metrics_df[metrics_df['is_senescent']].iterrows():
        cell_id = int(row['cell_id'])
        cell_mask = (cell_image == cell_id)

        # Fill holes in senescent cells
        filled_mask = ndimage.binary_fill_holes(cell_mask)

        # Update the adjusted image with the filled mask
        # We need to handle overlaps with other cells
        # First, remove the original cell
        adjusted_cell_image[cell_mask] = 0

        # Then add the filled version
        adjusted_cell_image[filled_mask] = cell_id

    # Calculate statistics
    senescent_count = metrics_df['is_senescent'].sum()
    total_cells = len(metrics_df)
    senescent_fraction = senescent_count / total_cells if total_cells > 0 else 0

    print(f"Detected {senescent_count} senescent cells out of {total_cells} total cells ({senescent_fraction:.2%})")

    # Return results
    return {
        'cell_metrics': metrics_df,
        'original_cell_image': cell_image,
        'adjusted_cell_image': adjusted_cell_image,
        'senescent_count': senescent_count,
        'total_cells': total_cells,
        'senescent_fraction': senescent_fraction
    }

# Function to visualize senescence detection results
def visualize_senescence_detection(results, cell_image, nucleus_image, output_path=None):
    """
    Visualize the senescence detection results.

    Parameters:
    results: Results from detect_senescent_cells
    cell_image: Original cell label image
    nucleus_image: Original nucleus label image
    output_path: Path to save the visualization
    """
    # Check if we have valid results
    if results['total_cells'] == 0:
        print("No valid cells to visualize")
        return

    # Create figure with subplots
    fig, axes = plt.subplots(2, 2, figsize=(16, 14))

    # 1. Original cell image with nuclei overlay
    cell_boundary = segmentation.find_boundaries(cell_image)
    nucleus_boundary = segmentation.find_boundaries(nucleus_image)

    overlay = np.zeros((*cell_image.shape, 3), dtype=np.uint8)
    # Cell boundaries in green
    overlay[cell_boundary, 0] = 0
    overlay[cell_boundary, 1] = 255
    overlay[cell_boundary, 2] = 0
    # Nucleus boundaries in blue
    overlay[nucleus_boundary, 0] = 0
    overlay[nucleus_boundary, 1] = 0
    overlay[nucleus_boundary, 2] = 255

    axes[0, 0].imshow(overlay)
    axes[0, 0].set_title("Original Cells (green) and Nuclei (blue)")
    axes[0, 0].axis('off')

    # 2. Senescent vs normal cells
    senescent_mask = np.zeros_like(cell_image, dtype=bool)
    normal_mask = np.zeros_like(cell_image, dtype=bool)

    cell_metrics = results['cell_metrics']

    for _, row in cell_metrics.iterrows():
        cell_id = int(row['cell_id'])
        cell_mask = (cell_image == cell_id)

        if row['is_senescent']:
            senescent_mask |= cell_mask
        else:
            normal_mask |= cell_mask

    classification = np.zeros((*cell_image.shape, 3), dtype=np.uint8)
    # Senescent cells in red
    classification[senescent_mask, 0] = 255
    classification[senescent_mask, 1] = 0
    classification[senescent_mask, 2] = 0
    # Normal cells in green
    classification[normal_mask, 0] = 0
    classification[normal_mask, 1] = 255
    classification[normal_mask, 2] = 0

    axes[0, 1].imshow(classification)
    axes[0, 1].set_title(f"Senescent Cells (red): {results['senescent_fraction']:.1%}")
    axes[0, 1].axis('off')

    # 3. Adjusted cell masks
    adjusted_boundary = segmentation.find_boundaries(results['adjusted_cell_image'])

    adjusted_overlay = np.zeros((*cell_image.shape, 3), dtype=np.uint8)
    # Original cell boundaries in green
    adjusted_overlay[cell_boundary, 0] = 0
    adjusted_overlay[cell_boundary, 1] = 255
    adjusted_overlay[cell_boundary, 2] = 0
    # Adjusted cell boundaries in yellow
    adjusted_overlay[adjusted_boundary, 0] = 255
    adjusted_overlay[adjusted_boundary, 1] = 255
    adjusted_overlay[adjusted_boundary, 2] = 0

    axes[1, 0].imshow(adjusted_overlay)
    axes[1, 0].set_title("Original (green) vs Adjusted (yellow) Cell Boundaries")
    axes[1, 0].axis('off')

    # 4. Metrics visualization
    senescent_metrics = cell_metrics[cell_metrics['is_senescent']]
    normal_metrics = cell_metrics[~cell_metrics['is_senescent']]

    # Choose one key metric for visualization
    metric = 'area'

    if len(senescent_metrics) > 0 and len(normal_metrics) > 0:
        axes[1, 1].boxplot([normal_metrics[metric], senescent_metrics[metric]],
                          labels=['Normal', 'Senescent'])
        axes[1, 1].set_ylabel(f'Cell {metric}')
        axes[1, 1].set_title(f'Distribution of {metric} by Cell Type')
    else:
        axes[1, 1].text(0.5, 0.5, "Not enough data for box plot",
                        ha='center', va='center', transform=axes[1, 1].transAxes)

    # Add metrics summary as text
    if len(senescent_metrics) > 0:
        sen_text = (f"Senescent cells (n={len(senescent_metrics)}):\n" +
                   f"Mean area: {senescent_metrics['area'].mean():.1f}\n" +
                   f"Mean nuclei count: {senescent_metrics['nuclei_count'].mean():.1f}")
    else:
        sen_text = "No senescent cells detected"

    if len(normal_metrics) > 0:
        norm_text = (f"Normal cells (n={len(normal_metrics)}):\n" +
                    f"Mean area: {normal_metrics['area'].mean():.1f}\n" +
                    f"Mean nuclei count: {normal_metrics['nuclei_count'].mean():.1f}")
    else:
        norm_text = "No normal cells detected"

    fig.text(0.02, 0.02, sen_text, fontsize=10)
    fig.text(0.52, 0.02, norm_text, fontsize=10)

    plt.tight_layout()

    if output_path:
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
        print(f"Visualization saved to {output_path}")

    plt.show()

# Function to create additional visualizations for multi-nucleated cells
def visualize_multinucleated_cells(results, cell_image, nucleus_image, output_path=None):
    """
    Create visualization specifically highlighting multinucleated cells.

    Parameters:
    results: Results from detect_senescent_cells
    cell_image: Original cell label image
    nucleus_image: Original nucleus label image
    output_path: Path to save the visualization
    """
    if results['total_cells'] == 0:
        print("No valid cells to visualize")
        return

    # Get metrics
    cell_metrics = results['cell_metrics']

    # Create figure
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))

    # Prepare colored image based on nuclei count
    multi_colored = np.zeros((*cell_image.shape, 3), dtype=np.uint8)

    # Color scheme:
    # 0 nuclei: gray
    # 1 nucleus: blue
    # 2 nuclei: green
    # 3+ nuclei: red
    color_map = {
        0: [100, 100, 100],  # Gray
        1: [0, 0, 255],      # Blue
        2: [0, 255, 0],      # Green
        3: [255, 0, 0]       # Red (3 or more)
    }

    # Apply colors
    for _, row in cell_metrics.iterrows():
        cell_id = int(row['cell_id'])
        nuclei_count = min(3, row['nuclei_count'])  # Cap at 3+ for coloring

        cell_mask = (cell_image == cell_id)
        color = color_map[nuclei_count]

        multi_colored[cell_mask, 0] = color[0]
        multi_colored[cell_mask, 1] = color[1]
        multi_colored[cell_mask, 2] = color[2]

    # Draw nuclei boundaries
    nucleus_boundary = segmentation.find_boundaries(nucleus_image)
    multi_colored[nucleus_boundary] = [255, 255, 255]  # White nucleus boundaries

    # Display colored image
    axes[0].imshow(multi_colored)
    axes[0].set_title("Cell Nuclei Count\nGray: 0, Blue: 1, Green: 2, Red: 3+")
    axes[0].axis('off')

    # Create distribution bar chart
    nuclei_counts = cell_metrics['nuclei_count'].value_counts().sort_index()

    # Ensure we have entries for 0, 1, 2, 3+ nuclei
    counts = [0, 0, 0, 0]
    for count, freq in nuclei_counts.items():
        if count >= 3:
            counts[3] += freq
        else:
            counts[count] = freq

    # Plot bar chart
    bars = axes[1].bar(['0', '1', '2', '3+'], counts, color=['gray', 'blue', 'green', 'red'])

    # Add count labels on bars
    for bar in bars:
        height = bar.get_height()
        axes[1].text(bar.get_x() + bar.get_width()/2., height + 0.1,
                    f'{int(height)}', ha='center', va='bottom')

    axes[1].set_xlabel('Number of Nuclei')
    axes[1].set_ylabel('Number of Cells')
    axes[1].set_title('Distribution of Nuclei Count')

    plt.tight_layout()

    if output_path:
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
        print(f"Multinucleated visualization saved to {output_path}")

    plt.show()

# Function to process a sample for senescence analysis
def process_senescence_analysis(sample_id, file_dict, output_dir):
    """
    Process a sample for senescence analysis.

    Parameters:
    sample_id: ID of the sample to process
    file_dict: Dictionary mapping sample IDs to component file paths
    output_dir: Directory to save outputs

    Returns:
    Results from senescence detection
    """
    print(f"Processing senescence analysis for sample {sample_id}")

    # Create sample-specific output directory
    sample_output_dir = os.path.join(output_dir, sample_id)
    os.makedirs(sample_output_dir, exist_ok=True)

    # Load images
    images = load_sample_images(sample_id, file_dict)

    if 'cell' not in images or 'nuclei' not in images:
        print(f"Error: Required cell or nuclei image not found for sample {sample_id}")
        return None

    # Run senescence detection
    results = detect_senescent_cells(
        images['cell'],
        images['nuclei'],
        expected_senescent_fraction=0.3  # From 30% TNF-a treated cells
    )

    # Skip saving if no valid cells found
    if results['total_cells'] == 0:
        print(f"No valid cells found in sample {sample_id}, skipping output generation")
        return results

    # Save results
    # 1. Save metrics as CSV
    metrics_file = os.path.join(sample_output_dir, f"{sample_id}_senescence_metrics.csv")
    results['cell_metrics'].to_csv(metrics_file, index=False)
    print(f"Saved cell metrics to {metrics_file}")

    # 2. Save adjusted cell mask
    adjusted_mask_file = os.path.join(sample_output_dir, f"{sample_id}_adjusted_cell_mask.tif")
    io.imsave(adjusted_mask_file, results['adjusted_cell_image'].astype(np.uint16))
    print(f"Saved adjusted cell mask to {adjusted_mask_file}")

    # 3. Create and save visualization
    vis_file = os.path.join(sample_output_dir, f"{sample_id}_senescence_visualization.png")
    visualize_senescence_detection(results, images['cell'], images['nuclei'], vis_file)

    # 4. Create and save multinucleated visualization
    multi_vis_file = os.path.join(sample_output_dir, f"{sample_id}_multinucleated_visualization.png")
    visualize_multinucleated_cells(results, images['cell'], images['nuclei'], multi_vis_file)

    # 5. Summary statistics
    summary = {
        'sample_id': sample_id,
        'total_cells': results['total_cells'],
        'senescent_cells': results['senescent_count'],
        'senescent_fraction': results['senescent_fraction'],
        'normal_cells': results['total_cells'] - results['senescent_count'],
        'normal_fraction': 1 - results['senescent_fraction']
    }

    # Add more detailed metrics if available
    if not results['cell_metrics'].empty:
        # Compare senescent vs normal cells
        senescent = results['cell_metrics'][results['cell_metrics']['is_senescent']]
        normal = results['cell_metrics'][~results['cell_metrics']['is_senescent']]

        # Add size metrics
        if not senescent.empty and not normal.empty:
            summary['senescent_mean_area'] = senescent['area'].mean()
            summary['normal_mean_area'] = normal['area'].mean()
            summary['size_ratio'] = summary['senescent_mean_area'] / summary['normal_mean_area']

            summary['senescent_mean_nuclei'] = senescent['nuclei_count'].mean()
            summary['normal_mean_nuclei'] = normal['nuclei_count'].mean()

            summary['senescent_multi_nuclei_pct'] = (senescent['nuclei_count'] > 1).mean()
            summary['normal_multi_nuclei_pct'] = (normal['nuclei_count'] > 1).mean()

    # Save summary as CSV
    summary_df = pd.DataFrame([summary])
    summary_file = os.path.join(sample_output_dir, f"{sample_id}_senescence_summary.csv")
    summary_df.to_csv(summary_file, index=False)
    print(f"Saved summary to {summary_file}")

    return results

# Function to compile results across all samples
def compile_cross_sample_results(output_dir, sample_ids):
    """Compile results across all analyzed samples"""

    all_summaries = []
    all_metrics = []

    for sample_id in sample_ids:
        sample_dir = os.path.join(output_dir, sample_id)

        # Read summary file
        summary_file = os.path.join(sample_dir, f"{sample_id}_senescence_summary.csv")
        if os.path.exists(summary_file):
            summary = pd.read_csv(summary_file)
            all_summaries.append(summary)

        # Read metrics file
        metrics_file = os.path.join(sample_dir, f"{sample_id}_senescence_metrics.csv")
        if os.path.exists(metrics_file):
            metrics = pd.read_csv(metrics_file)
            metrics['sample_id'] = sample_id  # Add sample ID
            all_metrics.append(metrics)

    # Combine all summaries
    if all_summaries:
        combined_summary = pd.concat(all_summaries, ignore_index=True)
        summary_output = os.path.join(output_dir, "all_samples_senescence_summary.csv")
        combined_summary.to_csv(summary_output, index=False)
        print(f"Saved combined summary to {summary_output}")

        # Create a visualization of senescence percentages across samples
        plt.figure(figsize=(12, 6))
        plt.bar(combined_summary['sample_id'],
                combined_summary['senescent_fraction'] * 100)
        plt.xlabel('Sample ID')
        plt.ylabel('Senescent Cells (%)')
        plt.title('Percentage of Senescent Cells Across Samples')
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, "senescence_percentages.png"), dpi=300)
        plt.close()

    # Combine all metrics
    if all_metrics:
        combined_metrics = pd.concat(all_metrics, ignore_index=True)
        metrics_output = os.path.join(output_dir, "all_samples_cell_metrics.csv")
        combined_metrics.to_csv(metrics_output, index=False)
        print(f"Saved combined metrics to {metrics_output}")

        # Create boxplots comparing senescent vs normal cells
        if 'is_senescent' in combined_metrics.columns:
            senescent = combined_metrics[combined_metrics['is_senescent']]
            normal = combined_metrics[~combined_metrics['is_senescent']]

            metrics_to_plot = ['area', 'nuclei_count', 'solidity', 'circularity']
            fig, axes = plt.subplots(2, 2, figsize=(14, 12))
            axes = axes.flatten()

            for i, metric in enumerate(metrics_to_plot):
                if metric in combined_metrics.columns:
                    if len(senescent) > 0 and len(normal) > 0:
                        axes[i].boxplot([normal[metric], senescent[metric]],
                                      labels=['Normal', 'Senescent'])
                        axes[i].set_ylabel(metric)
                        axes[i].set_title(f'{metric} Distribution')

            plt.tight_layout()
            plt.savefig(os.path.join(output_dir, "senescence_metrics_comparison.png"), dpi=300)
            plt.close()

            # Create scatter plot of area vs nuclei count
            plt.figure(figsize=(10, 8))
            plt.scatter(normal['area'], normal['nuclei_count'],
                      alpha=0.5, label='Normal', color='blue')
            plt.scatter(senescent['area'], senescent['nuclei_count'],
                      alpha=0.5, label='Senescent', color='red')
            plt.xlabel('Cell Area')
            plt.ylabel('Nuclei Count')
            plt.title('Cell Area vs Nuclei Count')
            plt.legend()
            plt.tight_layout()
            plt.savefig(os.path.join(output_dir, "area_vs_nuclei_scatter.png"), dpi=300)
            plt.close()

    print("Compiled results across all samples")

# Function to run all processing
def main():
    print("Starting senescent cell analysis pipeline...")

    # Find matching files
    file_dict, complete_samples = find_matching_files(base_dir)

    if not complete_samples:
        print("No complete samples found. Exiting.")
        return

    # Process all samples
    for sample_id in complete_samples:
        print(f"\nProcessing sample {sample_id}")
        senescence_results = process_senescence_analysis(sample_id, file_dict, output_dir)

        if senescence_results:
            print(f"Completed senescence analysis for sample {sample_id}")
            print(f"Found {senescence_results['senescent_count']} senescent cells " +
                  f"({senescence_results['senescent_fraction']:.1%}) out of {senescence_results['total_cells']} total cells")

    # Compile results across all samples
    compile_cross_sample_results(output_dir, complete_samples)

    print("Senescence analysis pipeline completed!")

# Run the main function
if __name__ == "__main__":
    main()

ValueError: Mountpoint must not already contain files