In [2]:
import os
import numpy as np
import pandas as pd
import cv2
import re
from scipy import ndimage
from skimage import io, measure
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import umap
from tqdm import tqdm

# Function to extract sample ID from new filename structure
def extract_sample_id(filename):
    """
    Extract the sample ID from a filename based on the new naming pattern for Static-x20.

    Examples:
    - '0Pa_A1_20dec21_20xA_L2RA_FlatA_seq012_cell_mask_merged_conservative.tif'
      → '0Pa_A1_20dec21_20xA_L2RA_FlatA_seq012'
    - 'denoised_0Pa_A1_20dec21_20xA_L2RA_FlatA_seq012_Nuclei_filtered_mask.tif'
      → '0Pa_A1_20dec21_20xA_L2RA_FlatA_seq012'
    """
    # Remove file extension
    base_name = os.path.splitext(filename)[0]

    # Handle special prefixes like "denoised_"
    if base_name.startswith('denoised_'):
        base_name = base_name[len('denoised_'):]

    # Regex to capture the part up to _seqXXX including all preceding identifiers
    # This pattern looks for a structure ending in _seq followed by digits.
    # It aims to capture everything from the start up to and including the sequence number.
    # Example: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq012
    pattern = re.compile(r'(.+?_seq\d+)')
    match = pattern.search(base_name)

    if match:
        # The first group is the desired sample ID
        extracted_id = match.group(1)
        # Further clean-up: remove common suffixes if they were accidentally captured before _seq
        # This step might need adjustment if the pattern before _seqXXX varies significantly
        # For the provided examples, this should be fine.
        # If specific suffixes like '_cell_mask_merged_conservative' or '_Nuclei_filtered_mask'
        # are part of the match *before* _seqXXX (which is unlikely with the current regex),
        # they would need to be stripped here. However, the regex `(.+?_seq\d+)`
        # should correctly stop at `_seq\d+`.

        return extracted_id

    # Fallback: if the regex didn't work, try a simpler split,
    # assuming 'seq' is a reliable delimiter.
    parts = base_name.split('_')
    for i, part in enumerate(parts):
        if part.startswith('seq') and i > 0: # Ensure 'seq' is not the first part
            # Join all parts up to and including the 'seqXXX' part
            return '_'.join(parts[:i+1])

    # Last resort: return a significant portion of the filename if specific patterns fail
    # This is a very basic fallback and might not be robust.
    print(f"Warning: Could not robustly extract sample ID from '{filename}'. Using fallback.")
    return '_'.join(base_name.split('_')[:7]) # Adjust number of parts as a heuristic

def find_mask_files(cell_dir, nuclei_dir):
    """Finds and pairs cell and nuclei mask files based on extracted sample ID."""
    print("\n--- Finding and Pairing Mask Files ---")

    # Get all relevant mask files
    cell_files = [f for f in os.listdir(cell_dir) if f.endswith(('.tif', '.tiff')) and not f.startswith('.')]
    nuclei_files = [f for f in os.listdir(nuclei_dir) if f.endswith(('.tif', '.tiff')) and not f.startswith('.')]

    print(f"Found {len(cell_files)} cell mask files and {len(nuclei_files)} nuclei mask files")

    # Create lookup dictionary for nuclei files with extracted sample IDs
    nuclei_lookup = {}
    for nuclei_file in nuclei_files:
        sample_id = extract_sample_id(nuclei_file)
        if sample_id:
            nuclei_lookup[sample_id] = nuclei_file
            # print(f"Nuclei file: '{nuclei_file}' → Sample ID: '{sample_id}'") # Verbose
        else:
            print(f"Warning: Could not extract sample ID for nuclei file: {nuclei_file}")


    # Match cell files to nuclei files
    file_pairs = []
    pairs_found = 0

    for cell_file in cell_files:
        sample_id = extract_sample_id(cell_file)
        # print(f"Cell file: '{cell_file}' → Sample ID: '{sample_id}'") # Verbose

        if sample_id and sample_id in nuclei_lookup:
            # print(f"Match found for ID: {sample_id}") # Verbose
            nuclei_file = nuclei_lookup[sample_id]
            file_pair = {
                'cell_file': os.path.join(cell_dir, cell_file),
                'nuclei_file': os.path.join(nuclei_dir, nuclei_file),
                'sample_id': sample_id
            }
            file_pairs.append(file_pair)
            pairs_found += 1
        elif sample_id:
            print(f"Warning: No matching nuclei file found for cell sample ID: {sample_id} (from file {cell_file})")
        else:
            print(f"Warning: Could not extract sample ID for cell file: {cell_file}")


    print(f"Total matching cell-nuclei file pairs found: {pairs_found}")
    if pairs_found == 0 and (len(cell_files) > 0 or len(nuclei_files) > 0) :
        print("CRITICAL WARNING: No file pairs were matched. Please check the `extract_sample_id` function and your filenames.")
        print("Example cell filenames:")
        for cf in cell_files[:3]: print(f"  - {cf} -> extracted ID: {extract_sample_id(cf)}")
        print("Example nuclei filenames:")
        for nf in nuclei_files[:3]: print(f"  - {nf} -> extracted ID: {extract_sample_id(nf)}")


    return file_pairs

def load_mask_image(filepath):
    """Loads a mask image, ensuring it's binary (0 or 1) or labeled."""
    try:
        img = io.imread(filepath)
        # Convert to a common integer type for labels, e.g., uint16
        # If the image is already labeled (e.g., integer types with max > 1)
        if img.dtype.kind in 'iu' and np.max(img) > 1: # integer unsigned/signed
            img = img.astype(np.uint16)
        # If it's boolean, convert to uint8 (0 or 1) then label
        elif img.dtype == bool:
            img = img.astype(np.uint8)
            img, _ = ndimage.label(img)
            img = img.astype(np.uint16)
        # If it's float, threshold, convert to uint8, then label
        elif img.dtype.kind == 'f':
            img = (img > 0.5).astype(np.uint8) # Assuming >0.5 is foreground
            img, _ = ndimage.label(img)
            img = img.astype(np.uint16)
        # If it's uint8 and likely binary (max is 0 or 1)
        elif img.dtype == np.uint8 and np.max(img) <= 1:
            img, _ = ndimage.label(img)
            img = img.astype(np.uint16)
        # If it's already uint8 and max > 1, assume it's labeled
        elif img.dtype == np.uint8 and np.max(img) > 1:
            img = img.astype(np.uint16)
        else: # Fallback for other types, attempt to label if it looks binary
            print(f"Warning: Unhandled image type {img.dtype} for {filepath}. Attempting binary conversion and labeling.")
            try:
                img_binary = (img > np.median(img)).astype(np.uint8) # Basic thresholding
                img, _ = ndimage.label(img_binary)
                img = img.astype(np.uint16)
            except Exception as e_conv:
                print(f"Error converting and labeling image {filepath}: {str(e_conv)}")
                return None


        # Handle multi-channel images (e.g. from saving labeled images as RGB)
        # This usually means the labels are replicated across channels or in one channel.
        if img.ndim > 2:
            print(f"Warning: Image {filepath} has multiple channels ({img.shape}). Taking first channel.")
            img = img[..., 0] # Take the first channel
            # Re-check if labeling is needed after taking one channel
            if np.max(img) <= 1:
                 img, _ = ndimage.label(img)
                 img = img.astype(np.uint16)


        return img
    except Exception as e:
        print(f"Error loading image {filepath}: {str(e)}")
        return None

def accurately_track_nuclei_in_cells(cell_mask, nuclei_mask):
    """
    Accurately identifies which nuclei are inside which cells.
    Assumes cell_mask and nuclei_mask are already labeled integer masks.
    """
    # Ensure masks are properly labeled (they should be from load_mask_image)
    labeled_cells = cell_mask
    num_cells = np.max(labeled_cells)

    labeled_nuclei = nuclei_mask
    num_nuclei = np.max(labeled_nuclei)

    print(f"Found {num_cells} potential cell objects and {num_nuclei} potential nuclei objects.")
    if num_cells == 0 or num_nuclei == 0:
        print("Warning: No cells or no nuclei found in the masks.")
        return {'cell_data': [], 'nuclei_data': [], 'cell_nuclei_mapping': {}}


    # Extract properties for cells and nuclei
    # Note: intensity_image can be the mask itself for regionprops if no other image is used for intensity measures
    cell_props = measure.regionprops(labeled_cells, intensity_image=labeled_cells)
    nuclei_props = measure.regionprops(labeled_nuclei, intensity_image=labeled_nuclei)

    results = {'cell_data': [], 'nuclei_data': [], 'cell_nuclei_mapping': {}}

    # Process each cell
    for cell_region in cell_props:
        cell_id = cell_region.label # This is the integer label from the mask
        cell_data = {
            'cell_id': cell_id, # Store the original label
            'area': cell_region.area,
            'perimeter': cell_region.perimeter,
            'eccentricity': cell_region.eccentricity if hasattr(cell_region, 'eccentricity') else 0,
            'orientation': np.degrees(cell_region.orientation) if hasattr(cell_region, 'orientation') else None,
            'major_axis_length': cell_region.major_axis_length if hasattr(cell_region, 'major_axis_length') else None,
            'minor_axis_length': cell_region.minor_axis_length if hasattr(cell_region, 'minor_axis_length') else None,
            'centroid_y': cell_region.centroid[0],
            'centroid_x': cell_region.centroid[1],
            'nuclei_count': 0,
            'nuclei_ids_in_cell': [] # Store IDs of nuclei within this cell
        }
        results['cell_data'].append(cell_data)
        results['cell_nuclei_mapping'][cell_id] = []


    # Find which nuclei belong to which cells
    for nucleus_region in nuclei_props:
        nucleus_id = nucleus_region.label # Original label from nuclei mask
        nucleus_mask_binary_single = (labeled_nuclei == nucleus_id) # Mask for this specific nucleus
        nucleus_area = nucleus_region.area

        contained_in_cell_id = None
        max_overlap_ratio = 0

        # Find which cell label this nucleus overlaps with the most
        # We check the label of the cell_mask at the nucleus's centroid
        # or, more robustly, by overlap percentage.

        # Method 1: Centroid-based (simpler, but can fail for complex shapes)
        # nuc_centroid_y, nuc_centroid_x = int(nucleus_region.centroid[0]), int(nucleus_region.centroid[1])
        # if 0 <= nuc_centroid_y < labeled_cells.shape[0] and 0 <= nuc_centroid_x < labeled_cells.shape[1]:
        #    cell_label_at_nuc_centroid = labeled_cells[nuc_centroid_y, nuc_centroid_x]
        #    if cell_label_at_nuc_centroid > 0: # Belongs to a cell
        #        # Check overlap for confirmation
        #        cell_mask_binary_single = (labeled_cells == cell_label_at_nuc_centroid)
        #        overlap = np.logical_and(cell_mask_binary_single, nucleus_mask_binary_single)
        #        overlap_area = np.sum(overlap)
        #        if nucleus_area > 0: # Avoid division by zero
        #             overlap_ratio = overlap_area / nucleus_area
        #             if overlap_ratio > 0.5: # More than 50% of nucleus in this cell
        #                 contained_in_cell_id = cell_label_at_nuc_centroid
        #                 max_overlap_ratio = overlap_ratio

        # Method 2: Overlap-based (more robust) - Iterate through cells (can be slow if many cells)
        # A faster overlap method:
        # Get the cell labels that overlap with the current nucleus's bounding box first
        min_row, min_col, max_row, max_col = nucleus_region.bbox
        candidate_cell_labels = np.unique(labeled_cells[min_row:max_row, min_col:max_col])

        for cell_label_candidate in candidate_cell_labels:
            if cell_label_candidate == 0: continue # Skip background

            cell_mask_binary_single_candidate = (labeled_cells == cell_label_candidate)
            overlap = np.logical_and(cell_mask_binary_single_candidate, nucleus_mask_binary_single)
            overlap_area = np.sum(overlap)

            if nucleus_area > 0:
                overlap_ratio = overlap_area / nucleus_area
                if overlap_ratio > max_overlap_ratio:
                    max_overlap_ratio = overlap_ratio
                    contained_in_cell_id = cell_label_candidate

        # Assign nucleus if overlap > 50%
        if contained_in_cell_id is not None and max_overlap_ratio > 0.5:
            nucleus_data = {
                'nucleus_id': nucleus_id, # Original label
                'cell_id': contained_in_cell_id, # Original label of the cell it's in
                'area': nucleus_area,
                'eccentricity': nucleus_region.eccentricity if hasattr(nucleus_region, 'eccentricity') else 0,
                'centroid_y': nucleus_region.centroid[0],
                'centroid_x': nucleus_region.centroid[1],
                'overlap_ratio': max_overlap_ratio
            }
            results['nuclei_data'].append(nucleus_data)

            # Update cell's nuclei count and list of nuclei
            for cd in results['cell_data']:
                if cd['cell_id'] == contained_in_cell_id:
                    cd['nuclei_count'] += 1
                    cd['nuclei_ids_in_cell'].append(nucleus_id)
                    break
            results['cell_nuclei_mapping'][contained_in_cell_id].append(nucleus_id)

    # Summary printouts
    cells_with_nuclei_count = sum(1 for cd in results['cell_data'] if cd['nuclei_count'] > 0)
    if num_cells > 0:
        print(f"Cells with at least one nucleus: {cells_with_nuclei_count}/{num_cells} ({100*cells_with_nuclei_count/num_cells:.1f}% of cells)")
        nuclei_counts_dist = [cd['nuclei_count'] for cd in results['cell_data']]
        for count_val in sorted(set(nuclei_counts_dist)):
            num_cells_with_this_count = sum(1 for n in nuclei_counts_dist if n == count_val)
            print(f"  Cells with {count_val} nuclei: {num_cells_with_this_count} ({100*num_cells_with_this_count/num_cells:.1f}%)")
    else:
        print("No cells found to analyze nuclei counts.")

    return results


def extract_features_for_cell(cell_data_item, cell_nuclei_map, nuclei_data_list):
    """
    Extract comprehensive morphometric features for a cell and its associated nuclei.
    'cell_data_item' is a single dictionary for one cell from results['cell_data'].
    'nuclei_data_list' is the full list of results['nuclei_data'].
    """
    features = {}

    # Basic cell features
    features['cell_area'] = cell_data_item['area']
    features['cell_perimeter'] = cell_data_item['perimeter']
    features['cell_eccentricity'] = cell_data_item.get('eccentricity', 0) # Use .get for safety

    if cell_data_item['perimeter'] > 0:
        features['cell_circularity'] = 4 * np.pi * cell_data_item['area'] / (cell_data_item['perimeter'] ** 2)
    else:
        features['cell_circularity'] = 0

    major_axis = cell_data_item.get('major_axis_length')
    minor_axis = cell_data_item.get('minor_axis_length')
    if major_axis is not None and minor_axis is not None and minor_axis > 0:
        features['cell_aspect_ratio'] = major_axis / minor_axis
    else:
        features['cell_aspect_ratio'] = 1.0


    # Nuclear features
    # cell_id here is the original integer label from the mask
    original_cell_id_label = cell_data_item['cell_id']
    # Get the list of nucleus original labels associated with this cell's original label
    nucleus_original_labels_in_cell = cell_nuclei_map.get(original_cell_id_label, [])
    features['nuclei_count'] = len(nucleus_original_labels_in_cell)


    # Initialize nuclear features with defaults
    features['avg_nucleus_area'] = 0
    features['total_nuclear_area'] = 0
    features['max_nucleus_area'] = 0
    features['avg_nucleus_eccentricity'] = 0
    features['nucleus_area_std'] = 0
    features['nucleus_displacement_avg'] = 0
    features['nucleus_to_cell_area_ratio'] = 0


    if features['nuclei_count'] > 0:
        # Filter nuclei_data_list to get only those nuclei whose original_label is in nucleus_original_labels_in_cell
        # AND whose 'cell_id' (which is the cell's original_label they were assigned to) matches current cell's original_cell_id_label

        # This is a bit redundant if cell_nuclei_map is correctly built by accurately_track_nuclei_in_cells
        # We can directly use nucleus_original_labels_in_cell to filter nuclei_data_list

        cell_nuclei_details = [n_data for n_data in nuclei_data_list if n_data['nucleus_id'] in nucleus_original_labels_in_cell and n_data['cell_id'] == original_cell_id_label]

        if cell_nuclei_details: # Ensure we found matching nuclei details
            nuclear_areas = [n['area'] for n in cell_nuclei_details]
            nuclear_eccentricities = [n.get('eccentricity', 0) for n in cell_nuclei_details]

            features['avg_nucleus_area'] = np.mean(nuclear_areas) if nuclear_areas else 0
            features['total_nuclear_area'] = sum(nuclear_areas)
            features['max_nucleus_area'] = max(nuclear_areas) if nuclear_areas else 0
            features['avg_nucleus_eccentricity'] = np.mean(nuclear_eccentricities) if nuclear_eccentricities else 0
            features['nucleus_area_std'] = np.std(nuclear_areas) if len(nuclear_areas) > 1 else 0

            if features['cell_area'] > 0:
                features['nucleus_to_cell_area_ratio'] = features['total_nuclear_area'] / features['cell_area']

            # Calculate average displacement of nuclei from cell center
            displacements = []
            for nucleus_detail in cell_nuclei_details:
                dx = nucleus_detail['centroid_x'] - cell_data_item['centroid_x']
                dy = nucleus_detail['centroid_y'] - cell_data_item['centroid_y']
                displacements.append(np.sqrt(dx**2 + dy**2))
            features['nucleus_displacement_avg'] = np.mean(displacements) if displacements else 0
        else:
             print(f"Warning: Cell {original_cell_id_label} reported {features['nuclei_count']} nuclei, but no matching details found in nuclei_data_list.")


    # Derived features for senescence (can be refined)
    # These are placeholders and should be calibrated based on your specific dataset and biological knowledge.
    avg_normal_nucleus_area = 500  # Placeholder - determine from control data if possible
    avg_normal_cell_area = 2000    # Placeholder - determine from control data

    features['is_polynucleated'] = 1 if features['nuclei_count'] > 1 else 0
    features['nuclear_enlargement_factor'] = features['avg_nucleus_area'] / avg_normal_nucleus_area if avg_normal_nucleus_area > 0 and features['avg_nucleus_area'] > 0 else 1.0
    features['cell_enlargement_factor'] = features['cell_area'] / avg_normal_cell_area if avg_normal_cell_area > 0 and features['cell_area'] > 0 else 1.0

    return features


def perform_clustering(all_features_df, umap_n_neighbors=30, umap_min_dist=0.1, umap_random_state=42, kmeans_random_state=42):
    """
    Performs UMAP dimensionality reduction and k-Means clustering.
    (This function is largely kept from your original, ensure features used are appropriate)
    """
    print("Starting clustering process...")
    if all_features_df.empty:
        print("Input DataFrame is empty. Cannot perform clustering.")
        return pd.DataFrame()

    # Define feature columns for clustering (exclude IDs and labels)
    potential_id_cols = ['cell_id_unique', 'sample_id', 'original_cell_label'] # Add any other ID columns
    feature_columns = [col for col in all_features_df.columns if col not in potential_id_cols and col not in ['umap_x', 'umap_y', 'cluster', 'cell_type']]

    numeric_feature_columns = []
    for col in feature_columns:
        if pd.api.types.is_numeric_dtype(all_features_df[col]):
            numeric_feature_columns.append(col)
        else:
            try:
                all_features_df[col] = pd.to_numeric(all_features_df[col])
                numeric_feature_columns.append(col)
            except ValueError:
                print(f"Warning: Column {col} is not numeric and could not be converted. Excluded from clustering.")

    feature_columns = numeric_feature_columns
    if not feature_columns:
        print("No valid numeric feature columns found for clustering.")
        return all_features_df # Or an empty DataFrame

    features_for_clustering_df = all_features_df[feature_columns].copy()
    # Handle NaNs by filling with mean (or a more sophisticated method if needed)
    features_for_clustering_df = features_for_clustering_df.fillna(features_for_clustering_df.mean())


    # Log Transform Area-based Features (before scaling)
    area_features_to_log = ['cell_area', 'avg_nucleus_area', 'total_nuclear_area', 'max_nucleus_area', 'cell_perimeter']
    print("\nApplying log transformation to selected area/perimeter features for clustering...")
    for col in area_features_to_log:
        if col in features_for_clustering_df.columns:
            features_for_clustering_df[col] = np.log1p(features_for_clustering_df[col]) # log1p handles zeros

    # Standardize Features
    print("\nStandardizing features for clustering...")
    scaler = StandardScaler()
    features_standardized = scaler.fit_transform(features_for_clustering_df)
    # features_standardized_df = pd.DataFrame(features_standardized, columns=feature_columns, index=features_for_clustering_df.index) # Not strictly needed if only using array

    # UMAP Dimensionality Reduction
    print("\nPerforming UMAP reduction...")
    # Ensure n_neighbors is less than the number of samples
    actual_umap_n_neighbors = min(umap_n_neighbors, features_standardized.shape[0] - 1)
    if actual_umap_n_neighbors < 2: # UMAP requires at least 2 neighbors
        print(f"Warning: Not enough samples ({features_standardized.shape[0]}) for UMAP with n_neighbors={umap_n_neighbors}. Skipping UMAP and KMeans.")
        clustered_df = all_features_df.copy()
        clustered_df['umap_x'] = np.nan
        clustered_df['umap_y'] = np.nan
        clustered_df['cluster'] = np.nan
        clustered_df['cell_type'] = 'Unknown' # Default if clustering fails
        return clustered_df

    reducer = umap.UMAP(n_neighbors=actual_umap_n_neighbors, min_dist=umap_min_dist, random_state=umap_random_state, n_components=2)
    embedding = reducer.fit_transform(features_standardized)

    clustered_df = all_features_df.copy()
    clustered_df['umap_x'] = embedding[:, 0]
    clustered_df['umap_y'] = embedding[:, 1]

    # k-Means Clustering (k=2 for Senescent/Non-senescent)
    print("\nPerforming k-Means clustering (k=2)...")
    if embedding.shape[0] >= 2: # KMeans needs at least n_clusters samples
        kmeans = KMeans(n_clusters=2, random_state=kmeans_random_state, n_init='auto')
        clustered_df['cluster'] = kmeans.fit_predict(embedding) # Cluster on UMAP embedding
    else:
        print("Warning: Not enough samples for KMeans clustering. Assigning NaN to clusters.")
        clustered_df['cluster'] = np.nan


    # Identify Senescent Cluster (heuristic based on your previous logic)
    # This part is highly dependent on your data and how features separate senescent cells.
    # The original notebook used a weighted score on cluster means.
    print("\nIdentifying senescent cluster (heuristic based on UMAP and feature means)...")
    if 'cluster' in clustered_df.columns and clustered_df['cluster'].notna().any():
        # Simplified heuristic: Assume cluster with larger mean cell_area is senescent
        # This needs to be validated and potentially replaced with your more robust scoring.
        # For now, using a placeholder.
        try:
            cluster_means = clustered_df.groupby('cluster')[['cell_area', 'avg_nucleus_area']].mean()
            # A simple way: senescent cluster has higher mean cell_area
            if not cluster_means.empty and 'cell_area' in cluster_means.columns:
                 senescent_cluster_label = cluster_means['cell_area'].idxmax()
                 print(f"Cluster {senescent_cluster_label} tentatively identified as 'Senescent' based on max mean cell_area.")
            else: # Fallback if groupby fails or cell_area not available
                senescent_cluster_label = 0 # Default to 0
                print("Warning: Could not determine senescent cluster based on cell_area. Defaulting.")

            clustered_df['cell_type'] = clustered_df['cluster'].apply(
                lambda x: 'Senescent' if x == senescent_cluster_label else 'Non-senescent'
            )
            # Apply multinucleation rule (cells with > 1 nucleus are senescent)
            # This threshold might need adjustment (e.g. >2)
            if 'nuclei_count' in clustered_df.columns:
                print("Applying multinucleation rule (nuclei_count > 1)...")
                clustered_df.loc[clustered_df['nuclei_count'] > 1, 'cell_type'] = 'Senescent'
            else:
                print("Warning: 'nuclei_count' not in DataFrame. Multinucleation rule not applied.")
        except Exception as e_clust_id:
            print(f"Error identifying senescent cluster: {e_clust_id}. Defaulting cell_type.")
            clustered_df['cell_type'] = 'Unknown'
    else:
        print("Clustering did not produce valid cluster labels. Setting cell_type to 'Unknown'.")
        clustered_df['cell_type'] = 'Unknown'


    print("\nClustering process complete.")
    return clustered_df

def visualize_clustering_results(clustered_df, output_dir_main):
    """Create visualizations of the clustering results."""
    print("\n--- Creating Visualizations ---")
    if clustered_df.empty or 'umap_x' not in clustered_df.columns or clustered_df['umap_x'].isnull().all():
        print("Skipping visualizations as UMAP data is missing or empty.")
        return

    # UMAP plot colored by final cell_type
    plt.figure(figsize=(10, 8))
    if 'cell_type' in clustered_df.columns and clustered_df['cell_type'].notna().any():
        sns.scatterplot(
            x='umap_x', y='umap_y',
            hue='cell_type',
            palette={'Senescent': 'red', 'Non-senescent': 'blue', 'Unknown': 'grey'},
            data=clustered_df,
            s=30, alpha=0.7, edgecolor='k', linewidth=0.5
        )
        plt.title('UMAP Projection by Final Cell Type', fontsize=14)
    else: # Fallback if cell_type is not available, color by cluster if possible
        if 'cluster' in clustered_df.columns and clustered_df['cluster'].notna().any():
             sns.scatterplot(x='umap_x', y='umap_y', hue='cluster', data=clustered_df, palette='viridis', s=30, alpha=0.7)
             plt.title('UMAP Projection by KMeans Cluster', fontsize=14)
        else: # Just plot UMAP points
             plt.scatter(clustered_df['umap_x'], clustered_df['umap_y'], s=30, alpha=0.5)
             plt.title('UMAP Projection (No Color)', fontsize=14)


    plt.xlabel('UMAP Dimension 1', fontsize=12)
    plt.ylabel('UMAP Dimension 2', fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.legend(title='Cell Type' if 'cell_type' in clustered_df.columns else 'Cluster')
    plt.savefig(os.path.join(output_dir_main, 'umap_final_cell_type.png'), dpi=300, bbox_inches='tight')
    plt.close()
    print("UMAP visualization saved.")

    # Key feature distributions
    key_features_for_plot = ['cell_area', 'avg_nucleus_area', 'nuclei_count', 'cell_circularity', 'nucleus_to_cell_area_ratio']
    key_features_present = [f for f in key_features_for_plot if f in clustered_df.columns and clustered_df[f].notna().any()]

    if key_features_present and 'cell_type' in clustered_df.columns and clustered_df['cell_type'].notna().any():
        num_feats = len(key_features_present)
        fig, axes = plt.subplots( (num_feats + 1)//2 , 2, figsize=(12, 3 * ((num_feats + 1)//2) ), squeeze=False)
        axes = axes.flatten()
        for i, feature in enumerate(key_features_present):
            sns.boxplot(x='cell_type', y=feature, data=clustered_df, ax=axes[i], palette={'Senescent': 'red', 'Non-senescent': 'blue', 'Unknown': 'grey'})
            axes[i].set_title(feature.replace('_', ' ').title(), fontsize=10)
            axes[i].set_xlabel('')
            axes[i].set_ylabel('')

        # Hide any unused subplots
        for j in range(i + 1, len(axes)):
            fig.delaxes(axes[j])

        plt.tight_layout()
        plt.savefig(os.path.join(output_dir_main, 'feature_distributions_by_type.png'), dpi=300, bbox_inches='tight')
        plt.close()
        print("Feature distribution boxplots saved.")


def save_results(clustered_df, output_directory):
    """Save the clustering results to CSV files."""
    if clustered_df.empty:
        print("No data to save.")
        return

    # Create output directory if it doesn't exist
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # Save full results
    clustered_df.to_csv(os.path.join(output_directory, 'cell_classification_full_results.csv'), index=False)
    print(f"Full cell classification results saved to: {os.path.join(output_directory, 'cell_classification_full_results.csv')}")


    # Create summary by sample
    if 'sample_id' in clustered_df.columns and 'cell_type' in clustered_df.columns:
        sample_summary = clustered_df.groupby('sample_id')['cell_type'].value_counts(normalize=False).unstack(fill_value=0)
        if 'Senescent' not in sample_summary.columns: sample_summary['Senescent'] = 0
        if 'Non-senescent' not in sample_summary.columns: sample_summary['Non-senescent'] = 0
        if 'Unknown' not in sample_summary.columns: sample_summary['Unknown'] = 0

        sample_summary['total_cells'] = sample_summary.sum(axis=1)
        if 'total_cells' in sample_summary.columns and (sample_summary['total_cells'] > 0).any() :
             sample_summary['percent_senescent'] = sample_summary['Senescent'] / sample_summary['total_cells'] * 100
             sample_summary['percent_non_senescent'] = sample_summary['Non-senescent'] / sample_summary['total_cells'] * 100
             sample_summary['percent_unknown'] = sample_summary['Unknown'] / sample_summary['total_cells'] * 100
        else:
            sample_summary['percent_senescent'] = 0
            sample_summary['percent_non_senescent'] = 0
            sample_summary['percent_unknown'] = 0


        sample_summary.to_csv(os.path.join(output_directory, 'sample_summary_by_type.csv'))
        print(f"Sample summary saved to: {os.path.join(output_directory, 'sample_summary_by_type.csv')}")

        # Plot senescent percentage by sample
        if 'percent_senescent' in sample_summary.columns:
            plt.figure(figsize=(12, 6))
            sns.barplot(x=sample_summary.index, y='percent_senescent', data=sample_summary.reset_index(), color='red')
            plt.title('Percentage of Senescent Cells by Sample')
            plt.xlabel('Sample ID')
            plt.ylabel('Senescent Cells (%)')
            plt.xticks(rotation=90, ha='right')
            plt.tight_layout()
            plt.savefig(os.path.join(output_directory, 'senescent_percentage_by_sample.png'), dpi=300)
            plt.close()
            print("Senescent percentage bar plot saved.")


def main(base_dir, cell_subdir, nuclei_subdir, output_parent_dir):
    """
    Main function to perform the analysis for the Static-x20 dataset.
    """
    print(f"=== Senescent Cell Classification Analysis for {base_dir} ===")

    cell_mask_dir = os.path.join(base_dir, cell_subdir)
    nuclei_dir = os.path.join(base_dir, nuclei_subdir)

    # Construct a unique output directory for this run
    # e.g. /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results
    dataset_name = os.path.basename(base_dir) # Should be "Static-x20"
    output_dir_main = os.path.join(output_parent_dir, dataset_name, "Senescence_Results")

    if not os.path.exists(output_dir_main):
        os.makedirs(output_dir_main)
        print(f"Created output directory: {output_dir_main}")

    # Find and pair mask files
    file_pairs = find_mask_files(cell_mask_dir, nuclei_dir)
    if not file_pairs:
        print("No matching file pairs found. Exiting analysis.")
        return

    all_cell_features_list = []

    for file_pair in tqdm(file_pairs, desc="Processing image pairs"):
        print(f"\nProcessing sample: {file_pair['sample_id']}")
        try:
            cell_mask = load_mask_image(file_pair['cell_file'])
            nuclei_mask = load_mask_image(file_pair['nuclei_file'])

            if cell_mask is None or nuclei_mask is None:
                print(f"Skipping {file_pair['sample_id']} due to mask loading error.")
                continue

            # Track nuclei in cells
            # This function returns a dict: {'cell_data': list_of_cell_dicts, 'nuclei_data': list_of_nuc_dicts, 'cell_nuclei_mapping': dict}
            tracking_results = accurately_track_nuclei_in_cells(cell_mask, nuclei_mask)

            # Extract features for each cell in the current image pair
            for single_cell_data in tracking_results['cell_data']:
                # 'single_cell_data' is a dict for one cell, includes its original_label as 'cell_id'
                # 'tracking_results['cell_nuclei_mapping']' maps cell original_label to list of nuclei original_labels
                # 'tracking_results['nuclei_data']' is a list of dicts for all nuclei in the image

                # Ensure the cell_id from single_cell_data is the original label used in mapping
                cell_original_label = single_cell_data['cell_id']

                features = extract_features_for_cell(
                    single_cell_data,
                    tracking_results['cell_nuclei_mapping'], # Pass the whole map
                    tracking_results['nuclei_data']          # Pass the list of all nuclei data for this image
                )

                # Create a globally unique cell ID and store original label
                features['cell_id_unique'] = f"{file_pair['sample_id']}_{cell_original_label}"
                features['sample_id'] = file_pair['sample_id']
                features['original_cell_label'] = cell_original_label # Keep track of original mask label

                all_cell_features_list.append(features)

        except Exception as e:
            print(f"Error processing {file_pair['sample_id']}: {str(e)}")
            import traceback
            traceback.print_exc()


    if not all_cell_features_list:
        print("No cell features were extracted. Please check input files and processing steps.")
        return

    all_features_df = pd.DataFrame(all_cell_features_list)
    print(f"\nTotal cells extracted across all samples: {len(all_features_df)}")

    # Save raw features before clustering (optional, for debugging)
    # all_features_df.to_csv(os.path.join(output_dir_main, "raw_features_extracted.csv"), index=False)


    # Perform clustering and classification
    clustered_df = perform_clustering(all_features_df)

    # Save results (CSV and summary plots)
    save_results(clustered_df, output_dir_main)

    # Visualize clustering results (UMAP plots, feature distributions)
    visualize_clustering_results(clustered_df, output_dir_main)


    print("\nAnalysis complete!")


if __name__ == "__main__":
    # Define base directory for the new dataset
    # Assuming "Static-x20" is at the same level as "flow3-x20"
    # /content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented/Static-x20

    # Path to the parent directory containing different datasets like "flow3-x20", "Static-x20"
    segmented_data_parent_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented"
    # Name of the current dataset's folder
    current_dataset_folder_name = "Static-x20"

    # Construct the base_dir for the current dataset
    base_input_dir = os.path.join(segmented_data_parent_dir, current_dataset_folder_name)

    # Subdirectory names (assumed to be the same)
    cell_masks_subdir = "Cell_merged_conservative"
    nuclei_masks_subdir = "Nuclei"

    # Parent directory for all analysis output
    analysis_output_parent_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis"

    # Check if the base input directory exists
    if not os.path.isdir(base_input_dir):
        print(f"Error: Base input directory not found: {base_input_dir}")
        print("Please ensure the 'Static-x20' folder exists at the specified location.")
    else:
        main(base_input_dir, cell_masks_subdir, nuclei_masks_subdir, analysis_output_parent_dir)



=== Senescent Cell Classification Analysis for /content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented/Static-x20 ===
Created output directory: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results

--- Finding and Pairing Mask Files ---
Found 23 cell mask files and 23 nuclei mask files
Total matching cell-nuclei file pairs found: 23


Processing image pairs:   0%|          | 0/23 [00:00<?, ?it/s]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq001
Found 304 potential cell objects and 318 potential nuclei objects.


Processing image pairs:   4%|▍         | 1/23 [00:02<01:00,  2.76s/it]

Cells with at least one nucleus: 304/304 (100.0% of cells)
  Cells with 1 nuclei: 291 (95.7%)
  Cells with 2 nuclei: 13 (4.3%)

Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq002
Found 234 potential cell objects and 252 potential nuclei objects.


Processing image pairs:   9%|▊         | 2/23 [00:05<01:03,  3.03s/it]

Cells with at least one nucleus: 234/234 (100.0% of cells)
  Cells with 1 nuclei: 222 (94.9%)
  Cells with 2 nuclei: 10 (4.3%)
  Cells with 3 nuclei: 2 (0.9%)

Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq003
Found 310 potential cell objects and 332 potential nuclei objects.


Processing image pairs:  13%|█▎        | 3/23 [00:09<01:00,  3.05s/it]

Cells with at least one nucleus: 310/310 (100.0% of cells)
  Cells with 1 nuclei: 294 (94.8%)
  Cells with 2 nuclei: 15 (4.8%)
  Cells with 3 nuclei: 1 (0.3%)

Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq004
Found 303 potential cell objects and 316 potential nuclei objects.


Processing image pairs:  17%|█▋        | 4/23 [00:12<00:58,  3.05s/it]

Cells with at least one nucleus: 303/303 (100.0% of cells)
  Cells with 1 nuclei: 293 (96.7%)
  Cells with 2 nuclei: 10 (3.3%)

Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq005
Found 236 potential cell objects and 253 potential nuclei objects.


Processing image pairs:  22%|██▏       | 5/23 [00:14<00:51,  2.85s/it]

Cells with at least one nucleus: 236/236 (100.0% of cells)
  Cells with 1 nuclei: 225 (95.3%)
  Cells with 2 nuclei: 10 (4.2%)
  Cells with 3 nuclei: 1 (0.4%)

Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq006
Found 262 potential cell objects and 271 potential nuclei objects.


Processing image pairs:  26%|██▌       | 6/23 [00:17<00:45,  2.70s/it]

Cells with at least one nucleus: 262/262 (100.0% of cells)
  Cells with 1 nuclei: 256 (97.7%)
  Cells with 2 nuclei: 6 (2.3%)

Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq007
Found 214 potential cell objects and 233 potential nuclei objects.


Processing image pairs:  30%|███       | 7/23 [00:19<00:43,  2.72s/it]

Cells with at least one nucleus: 214/214 (100.0% of cells)
  Cells with 1 nuclei: 203 (94.9%)
  Cells with 2 nuclei: 10 (4.7%)
  Cells with 3 nuclei: 1 (0.5%)

Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq008
Found 234 potential cell objects and 254 potential nuclei objects.


Processing image pairs:  35%|███▍      | 8/23 [00:22<00:40,  2.70s/it]

Cells with at least one nucleus: 234/234 (100.0% of cells)
  Cells with 1 nuclei: 220 (94.0%)
  Cells with 2 nuclei: 14 (6.0%)

Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq009
Found 283 potential cell objects and 296 potential nuclei objects.


Processing image pairs:  39%|███▉      | 9/23 [00:24<00:37,  2.65s/it]

Cells with at least one nucleus: 283/283 (100.0% of cells)
  Cells with 1 nuclei: 276 (97.5%)
  Cells with 2 nuclei: 7 (2.5%)

Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq010
Found 210 potential cell objects and 225 potential nuclei objects.


Processing image pairs:  43%|████▎     | 10/23 [00:27<00:33,  2.57s/it]

Cells with at least one nucleus: 210/210 (100.0% of cells)
  Cells with 1 nuclei: 199 (94.8%)
  Cells with 2 nuclei: 11 (5.2%)

Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq011
Found 261 potential cell objects and 278 potential nuclei objects.


Processing image pairs:  48%|████▊     | 11/23 [00:30<00:31,  2.59s/it]

Cells with at least one nucleus: 261/261 (100.0% of cells)
  Cells with 1 nuclei: 251 (96.2%)
  Cells with 2 nuclei: 10 (3.8%)

Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq001
Found 217 potential cell objects and 241 potential nuclei objects.


Processing image pairs:  52%|█████▏    | 12/23 [00:32<00:29,  2.64s/it]

Cells with at least one nucleus: 217/217 (100.0% of cells)
  Cells with 1 nuclei: 204 (94.0%)
  Cells with 2 nuclei: 11 (5.1%)
  Cells with 3 nuclei: 2 (0.9%)

Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq002
Found 287 potential cell objects and 315 potential nuclei objects.


Processing image pairs:  57%|█████▋    | 13/23 [00:35<00:27,  2.74s/it]

Cells with at least one nucleus: 287/287 (100.0% of cells)
  Cells with 1 nuclei: 266 (92.7%)
  Cells with 2 nuclei: 19 (6.6%)
  Cells with 3 nuclei: 2 (0.7%)

Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq003
Found 284 potential cell objects and 313 potential nuclei objects.


Processing image pairs:  61%|██████    | 14/23 [00:38<00:25,  2.86s/it]

Cells with at least one nucleus: 284/284 (100.0% of cells)
  Cells with 1 nuclei: 262 (92.3%)
  Cells with 2 nuclei: 21 (7.4%)
  Cells with 3 nuclei: 1 (0.4%)

Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq004
Found 222 potential cell objects and 280 potential nuclei objects.


Processing image pairs:  65%|██████▌   | 15/23 [00:41<00:22,  2.81s/it]

Cells with at least one nucleus: 222/222 (100.0% of cells)
  Cells with 1 nuclei: 179 (80.6%)
  Cells with 2 nuclei: 34 (15.3%)
  Cells with 3 nuclei: 7 (3.2%)
  Cells with 4 nuclei: 1 (0.5%)
  Cells with 5 nuclei: 1 (0.5%)

Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq005
Found 196 potential cell objects and 238 potential nuclei objects.


Processing image pairs:  70%|██████▉   | 16/23 [00:44<00:18,  2.70s/it]

Cells with at least one nucleus: 196/196 (100.0% of cells)
  Cells with 1 nuclei: 170 (86.7%)
  Cells with 2 nuclei: 20 (10.2%)
  Cells with 3 nuclei: 5 (2.6%)
  Cells with 4 nuclei: 1 (0.5%)

Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq006
Found 273 potential cell objects and 332 potential nuclei objects.


Processing image pairs:  74%|███████▍  | 17/23 [00:47<00:17,  2.88s/it]

Cells with at least one nucleus: 273/273 (100.0% of cells)
  Cells with 1 nuclei: 231 (84.6%)
  Cells with 2 nuclei: 34 (12.5%)
  Cells with 3 nuclei: 7 (2.6%)
  Cells with 4 nuclei: 1 (0.4%)

Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq007
Found 223 potential cell objects and 270 potential nuclei objects.


Processing image pairs:  78%|███████▊  | 18/23 [00:49<00:13,  2.75s/it]

Cells with at least one nucleus: 223/223 (100.0% of cells)
  Cells with 1 nuclei: 188 (84.3%)
  Cells with 2 nuclei: 30 (13.5%)
  Cells with 3 nuclei: 5 (2.2%)

Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq008
Found 268 potential cell objects and 306 potential nuclei objects.


Processing image pairs:  83%|████████▎ | 19/23 [00:52<00:11,  2.76s/it]

Cells with at least one nucleus: 268/268 (100.0% of cells)
  Cells with 1 nuclei: 235 (87.7%)
  Cells with 2 nuclei: 32 (11.9%)
  Cells with 3 nuclei: 1 (0.4%)

Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq009
Found 304 potential cell objects and 318 potential nuclei objects.


Processing image pairs:  87%|████████▋ | 20/23 [00:55<00:08,  2.76s/it]

Cells with at least one nucleus: 304/304 (100.0% of cells)
  Cells with 1 nuclei: 299 (98.4%)
  Cells with 2 nuclei: 5 (1.6%)

Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq010
Found 183 potential cell objects and 190 potential nuclei objects.


Processing image pairs:  91%|█████████▏| 21/23 [00:57<00:05,  2.59s/it]

Cells with at least one nucleus: 183/183 (100.0% of cells)
  Cells with 1 nuclei: 180 (98.4%)
  Cells with 2 nuclei: 3 (1.6%)

Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq011
Found 216 potential cell objects and 230 potential nuclei objects.


Processing image pairs:  96%|█████████▌| 22/23 [01:00<00:02,  2.67s/it]

Cells with at least one nucleus: 216/216 (100.0% of cells)
  Cells with 1 nuclei: 207 (95.8%)
  Cells with 2 nuclei: 9 (4.2%)

Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq012
Found 215 potential cell objects and 232 potential nuclei objects.


Processing image pairs: 100%|██████████| 23/23 [01:02<00:00,  2.74s/it]

Cells with at least one nucleus: 215/215 (100.0% of cells)
  Cells with 1 nuclei: 202 (94.0%)
  Cells with 2 nuclei: 12 (5.6%)
  Cells with 3 nuclei: 1 (0.5%)

Total cells extracted across all samples: 5739
Starting clustering process...

Applying log transformation to selected area/perimeter features for clustering...

Standardizing features for clustering...

Performing UMAP reduction...



  warn(



Performing k-Means clustering (k=2)...

Identifying senescent cluster (heuristic based on UMAP and feature means)...
Cluster 1 tentatively identified as 'Senescent' based on max mean cell_area.
Applying multinucleation rule (nuclei_count > 1)...

Clustering process complete.
Full cell classification results saved to: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/cell_classification_full_results.csv
Sample summary saved to: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/sample_summary_by_type.csv
Senescent percentage bar plot saved.

--- Creating Visualizations ---
UMAP visualization saved.



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(x='cell_type', y=feature, data=clustered_df, ax=axes[i], palette={'Senescent': 'red', 'Non-senescent': 'blue', 'Unknown': 'grey'})

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(x='cell_type', y=feature, data=clustered_df, ax=axes[i], palette={'Senescent': 'red', 'Non-senescent': 'blue', 'Unknown': 'grey'})

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(x='cell_type', y=feature, data=clustered_df, ax=axes[i], palette={'Senescent': 'red', 'Non-senescent': 'blue', 'Unknown': 'grey'})

Passing `palette` without assigning `hue` is deprecated and wil

Feature distribution boxplots saved.

Analysis complete!


In [4]:
import os
import numpy as np
import pandas as pd
import cv2
import re
from scipy import ndimage
from skimage import io, measure, segmentation # Added segmentation
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
# KMeans is no longer used for final classification but can be kept for exploratory UMAP clustering
from sklearn.cluster import KMeans
import umap
from tqdm import tqdm

# --- Configuration & Parameters ---
# Features to be used for UMAP generation
FEATURES_FOR_UMAP = [
    'cell_area', 'cell_perimeter', 'cell_eccentricity', 'cell_circularity',
    'cell_aspect_ratio', 'avg_nucleus_area', 'max_nucleus_area',
    'avg_nucleus_eccentricity', 'nucleus_area_std', 'nucleus_displacement_avg', # Renamed from nucleus_displacement
    'nucleus_to_cell_area_ratio',
    'nuclear_enlargement_factor', 'cell_enlargement_factor' # Renamed from nuclear_enlargement, cell_enlargement
]

# Features to log-transform before UMAP and Senescence Score calculation
AREA_FEATURES_TO_LOG = ['cell_area', 'avg_nucleus_area', 'total_nuclear_area', 'max_nucleus_area', 'cell_perimeter']

# Weights for calculating the senescence score
SENESCENCE_SCORE_WEIGHTS = {
    'cell_area': 1.5,
    'cell_perimeter': 0.5,
    'cell_eccentricity': 0.5,
    'cell_circularity': -1.0,
    'cell_aspect_ratio': 0.5,
    'avg_nucleus_area': 1.0,
    'avg_nucleus_eccentricity': 0.3,
    'nucleus_to_cell_area_ratio': -1.0, # Note: original script used -1.5, using -1.0 from user's second script example
    'nuclear_enlargement_factor': 1.0, # Matched to feature name from extract_features_for_cell
    'cell_enlargement_factor': 1.5,    # Matched to feature name from extract_features_for_cell
    'nucleus_displacement_avg': 0.2,   # Matched to feature name from extract_features_for_cell
}

# Rule-based gating definition from user
RULE_BASED_GATES = [
    {   'name': 'Polynucleated',
        'conditions': [('nuclei_count', '>', 1)],
        'output_label': 'Rule_Sen_Poly' },
    {   'name': 'Very_Large_Cell',
        'conditions': [('cell_area', '>', 5000)],
        'output_label': 'Rule_Sen_VeryLarge' },
    {   'name': 'Low_Circularity',
        'conditions': [('cell_circularity', '<', 0.2)],
        'output_label': 'Rule_Sen_LowCirc' },
    {   'name': 'Low_NucToCellRatio',
        'conditions': [('nucleus_to_cell_area_ratio', '<', 0.1)],
        'output_label': 'Rule_Sen_LowNucRatio' },
    {   'name': 'High_Score_Not_Otherwise_Caught', # This rule needs 'senescence_score_normalized'
        'conditions': [('senescence_score_normalized', '>', 0.75)],
        'output_label': 'Rule_Sen_HighScore' }
]
RULE_BASED_DEFAULT_LABEL = 'Rule_NonSenescent'


# Function to extract sample ID from new filename structure
def extract_sample_id(filename):
    """
    Extract the sample ID from a filename based on the new naming pattern for Static-x20.
    """
    base_name = os.path.splitext(filename)[0]
    if base_name.startswith('denoised_'):
        base_name = base_name[len('denoised_'):]
    pattern = re.compile(r'(.+?_seq\d+)')
    match = pattern.search(base_name)
    if match:
        return match.group(1)
    parts = base_name.split('_')
    for i, part in enumerate(parts):
        if part.startswith('seq') and i > 0:
            return '_'.join(parts[:i+1])
    print(f"Warning: Could not robustly extract sample ID from '{filename}'. Using fallback.")
    return '_'.join(base_name.split('_')[:7])

def find_mask_files(cell_dir, nuclei_dir):
    """Finds and pairs cell and nuclei mask files based on extracted sample ID."""
    print("\n--- Finding and Pairing Mask Files ---")
    cell_files = [f for f in os.listdir(cell_dir) if f.endswith(('.tif', '.tiff')) and not f.startswith('.')]
    nuclei_files = [f for f in os.listdir(nuclei_dir) if f.endswith(('.tif', '.tiff')) and not f.startswith('.')]
    print(f"Found {len(cell_files)} cell mask files and {len(nuclei_files)} nuclei mask files")

    nuclei_lookup = {}
    for nuclei_file in nuclei_files:
        sample_id = extract_sample_id(nuclei_file)
        if sample_id:
            nuclei_lookup[sample_id] = nuclei_file
        else:
            print(f"Warning: Could not extract sample ID for nuclei file: {nuclei_file}")

    file_pairs = []
    pairs_found = 0
    for cell_file in cell_files:
        sample_id = extract_sample_id(cell_file)
        if sample_id and sample_id in nuclei_lookup:
            nuclei_file = nuclei_lookup[sample_id]
            file_pairs.append({
                'cell_file': os.path.join(cell_dir, cell_file),
                'nuclei_file': os.path.join(nuclei_dir, nuclei_file),
                'sample_id': sample_id
            })
            pairs_found += 1
        elif sample_id:
            print(f"Warning: No matching nuclei file for cell sample ID: {sample_id} (from {cell_file})")
        else:
            print(f"Warning: Could not extract sample ID for cell file: {cell_file}")

    print(f"Total matching cell-nuclei file pairs found: {pairs_found}")
    if pairs_found == 0 and (len(cell_files) > 0 or len(nuclei_files) > 0) :
        print("CRITICAL WARNING: No file pairs matched. Check `extract_sample_id` and filenames.")
    return file_pairs

def load_mask_image(filepath):
    """Loads a mask image, ensuring it's labeled."""
    try:
        img = io.imread(filepath)
        if img.dtype.kind in 'iu' and np.max(img) > 1: img = img.astype(np.uint16)
        elif img.dtype == bool: img = ndimage.label(img.astype(np.uint8))[0].astype(np.uint16)
        elif img.dtype.kind == 'f': img = ndimage.label((img > 0.5).astype(np.uint8))[0].astype(np.uint16)
        elif img.dtype == np.uint8 and np.max(img) <= 1: img = ndimage.label(img)[0].astype(np.uint16)
        elif img.dtype == np.uint8 and np.max(img) > 1: img = img.astype(np.uint16)
        else:
            print(f"Warning: Unhandled img type {img.dtype} for {filepath}. Attempting binary labeling.")
            try: img = ndimage.label((img > np.median(img)).astype(np.uint8))[0].astype(np.uint16)
            except Exception as e_conv: print(f"Error converting/labeling {filepath}: {e_conv}"); return None
        if img.ndim > 2:
            print(f"Warning: Multi-channel image {filepath} ({img.shape}). Taking first channel.")
            img = img[..., 0]
            if np.max(img) <= 1: img = ndimage.label(img)[0].astype(np.uint16)
        return img
    except Exception as e: print(f"Error loading image {filepath}: {e}"); return None

def accurately_track_nuclei_in_cells(cell_mask, nuclei_mask):
    """Identifies which nuclei are inside which cells."""
    labeled_cells, num_cells = cell_mask, np.max(cell_mask)
    labeled_nuclei, num_nuclei = nuclei_mask, np.max(nuclei_mask)
    print(f"Found {num_cells} cell objects, {num_nuclei} nuclei objects.")
    if num_cells == 0 or num_nuclei == 0: return {'cell_data': [], 'nuclei_data': [], 'cell_nuclei_mapping': {}}

    cell_props = measure.regionprops(labeled_cells)
    nuclei_props = measure.regionprops(labeled_nuclei)
    results = {'cell_data': [], 'nuclei_data': [], 'cell_nuclei_mapping': {}}

    for cell_region in cell_props:
        cell_id = cell_region.label
        results['cell_data'].append({
            'cell_id': cell_id, 'area': cell_region.area, 'perimeter': cell_region.perimeter,
            'eccentricity': getattr(cell_region, 'eccentricity', 0),
            'orientation': np.degrees(getattr(cell_region, 'orientation', 0)),
            'major_axis_length': getattr(cell_region, 'major_axis_length', 0),
            'minor_axis_length': getattr(cell_region, 'minor_axis_length', 0),
            'centroid_y': cell_region.centroid[0], 'centroid_x': cell_region.centroid[1],
            'nuclei_count': 0, 'nuclei_ids_in_cell': []
        })
        results['cell_nuclei_mapping'][cell_id] = []

    for nucleus_region in nuclei_props:
        nucleus_id, nucleus_area = nucleus_region.label, nucleus_region.area
        nucleus_mask_single = (labeled_nuclei == nucleus_id)
        contained_in_cell_id, max_overlap_ratio = None, 0
        min_r, min_c, max_r, max_c = nucleus_region.bbox
        candidate_cell_labels = np.unique(labeled_cells[min_r:max_r, min_c:max_c])
        for cell_label_cand in candidate_cell_labels:
            if cell_label_cand == 0: continue
            overlap_area = np.sum(np.logical_and(labeled_cells == cell_label_cand, nucleus_mask_single))
            if nucleus_area > 0:
                overlap_ratio = overlap_area / nucleus_area
                if overlap_ratio > max_overlap_ratio:
                    max_overlap_ratio, contained_in_cell_id = overlap_ratio, cell_label_cand
        if contained_in_cell_id is not None and max_overlap_ratio > 0.5:
            results['nuclei_data'].append({
                'nucleus_id': nucleus_id, 'cell_id': contained_in_cell_id, 'area': nucleus_area,
                'eccentricity': getattr(nucleus_region, 'eccentricity', 0),
                'centroid_y': nucleus_region.centroid[0], 'centroid_x': nucleus_region.centroid[1],
                'overlap_ratio': max_overlap_ratio
            })
            for cd in results['cell_data']:
                if cd['cell_id'] == contained_in_cell_id:
                    cd['nuclei_count'] += 1; cd['nuclei_ids_in_cell'].append(nucleus_id); break
            results['cell_nuclei_mapping'][contained_in_cell_id].append(nucleus_id)
    return results

def extract_features_for_cell(cell_data_item, cell_nuclei_map, nuclei_data_list):
    """Extracts morphometric features for a cell and its nuclei."""
    features = {}
    features['cell_area'] = cell_data_item['area']
    features['cell_perimeter'] = cell_data_item['perimeter']
    features['cell_eccentricity'] = cell_data_item.get('eccentricity', 0)
    features['cell_circularity'] = (4 * np.pi * features['cell_area'] / (features['cell_perimeter']**2)) if features['cell_perimeter'] > 0 else 0
    major_axis, minor_axis = cell_data_item.get('major_axis_length'), cell_data_item.get('minor_axis_length')
    features['cell_aspect_ratio'] = (major_axis / minor_axis) if major_axis and minor_axis and minor_axis > 0 else 1.0

    original_cell_label = cell_data_item['cell_id']
    nucleus_original_labels = cell_nuclei_map.get(original_cell_label, [])
    features['nuclei_count'] = len(nucleus_original_labels)

    for k in ['avg_nucleus_area', 'total_nuclear_area', 'max_nucleus_area',
              'avg_nucleus_eccentricity', 'nucleus_area_std',
              'nucleus_displacement_avg', 'nucleus_to_cell_area_ratio']: features[k] = 0

    if features['nuclei_count'] > 0:
        cell_nuclei_details = [nd for nd in nuclei_data_list if nd['nucleus_id'] in nucleus_original_labels and nd['cell_id'] == original_cell_label]
        if cell_nuclei_details:
            areas = [n['area'] for n in cell_nuclei_details]
            eccs = [n.get('eccentricity', 0) for n in cell_nuclei_details]
            features['avg_nucleus_area'] = np.mean(areas) if areas else 0
            features['total_nuclear_area'] = sum(areas)
            features['max_nucleus_area'] = max(areas) if areas else 0
            features['avg_nucleus_eccentricity'] = np.mean(eccs) if eccs else 0
            features['nucleus_area_std'] = np.std(areas) if len(areas) > 1 else 0
            if features['cell_area'] > 0:
                features['nucleus_to_cell_area_ratio'] = features['total_nuclear_area'] / features['cell_area']
            displacements = [np.sqrt((n['centroid_x'] - cell_data_item['centroid_x'])**2 + (n['centroid_y'] - cell_data_item['centroid_y'])**2) for n in cell_nuclei_details]
            features['nucleus_displacement_avg'] = np.mean(displacements) if displacements else 0

    avg_normal_nucleus_area, avg_normal_cell_area = 500, 2000
    features['nuclear_enlargement_factor'] = features.get('avg_nucleus_area',0) / avg_normal_nucleus_area if avg_normal_nucleus_area > 0 and features.get('avg_nucleus_area',0) > 0 else 1.0
    features['cell_enlargement_factor'] = features['cell_area'] / avg_normal_cell_area if avg_normal_cell_area > 0 and features['cell_area'] > 0 else 1.0

    return features

def calculate_senescence_score(df, score_weights, area_log_features):
    """Calculates a per-cell senescence score and its normalized version."""
    print("\nCalculating per-cell senescence score...")
    features_present_for_scoring = [f for f in score_weights.keys() if f in df.columns]

    if not features_present_for_scoring:
        print("Error: No features for senescence score calculation are present. Score will be NaN.")
        df['senescence_score'] = np.nan
        df['senescence_score_normalized'] = np.nan
        return df

    score_df_subset = df[features_present_for_scoring].copy()

    for col in area_log_features:
        if col in score_df_subset.columns:
            score_df_subset[col] = np.log1p(score_df_subset[col])
            print(f"  Log-transformed for scoring: {col}")

    scaler = StandardScaler()
    numeric_cols_in_subset = score_df_subset.select_dtypes(include=np.number).columns
    if not numeric_cols_in_subset.empty:
        score_features_standardized_values = scaler.fit_transform(score_df_subset[numeric_cols_in_subset])
        score_features_standardized_df = pd.DataFrame(score_features_standardized_values,
                                                      columns=numeric_cols_in_subset,
                                                      index=score_df_subset.index)
    else:
        print("  Warning: No numeric columns found in scoring subset for standardization.")
        score_features_standardized_df = pd.DataFrame(index=score_df_subset.index)

    df['senescence_score'] = 0.0
    for feature, weight in score_weights.items():
        if feature in score_features_standardized_df.columns:
            df['senescence_score'] += score_features_standardized_df[feature] * weight
        elif feature in df.columns:
             print(f"  Warning: Scoring feature '{feature}' not in standardized set, using original. Ensure this is intended.")
        else:
            print(f"  Warning: Feature '{feature}' for scoring not found in DataFrame. Skipping.")

    min_score, max_score = df['senescence_score'].min(), df['senescence_score'].max()
    if pd.isna(min_score) or pd.isna(max_score) or min_score == max_score:
        df['senescence_score_normalized'] = 0.5
        print("  Warning: Senescence score is NaN or uniform. Normalized score set to 0.5.")
    else:
        df['senescence_score_normalized'] = (df['senescence_score'] - min_score) / (max_score - min_score)

    print("Senescence score calculation complete.")
    return df

def apply_rule_based_classification(df, rules, default_label):
    """Applies rule-based gates for classification."""
    print("\n--- Applying Rule-Based Classification ---")
    df['rule_based_classification_granular'] = default_label

    all_rule_features = set()
    for rule in rules:
        for condition_feature, _, _ in rule['conditions']:
            all_rule_features.add(condition_feature)

    missing_features_for_rules = [feat for feat in all_rule_features if feat not in df.columns]
    if missing_features_for_rules:
        print(f"  ERROR: Critical features for rule-based gating are missing: {missing_features_for_rules}. Aborting rule application.")
        df['cell_type'] = 'Unknown_Rules_Missing_Features'
        return df

    for rule in rules:
        print(f"  Applying rule: {rule['name']}")
        eligible_mask = (df['rule_based_classification_granular'] == default_label)
        if not eligible_mask.any():
            continue

        current_rule_mask = pd.Series([True] * len(df), index=df.index)
        for feature, operator, value in rule['conditions']:
            feature_series = pd.to_numeric(df[feature], errors='coerce')
            nan_mask = feature_series.isnull()

            if operator == '>': current_rule_mask &= (feature_series > value)
            elif operator == '<': current_rule_mask &= (feature_series < value)
            elif operator == '>=': current_rule_mask &= (feature_series >= value)
            elif operator == '<=': current_rule_mask &= (feature_series <= value)
            elif operator == '==': current_rule_mask &= (feature_series == value)
            elif operator == '!=': current_rule_mask &= (feature_series != value)
            else: print(f"    Unknown operator '{operator}' in rule '{rule['name']}'."); current_rule_mask[:] = False; break
            current_rule_mask[nan_mask] = False

        if not current_rule_mask.any():
            continue

        cells_to_label_now = eligible_mask & current_rule_mask
        df.loc[cells_to_label_now, 'rule_based_classification_granular'] = rule['output_label']
        print(f"    {cells_to_label_now.sum()} cells labeled as '{rule['output_label']}'.")

    df['cell_type'] = np.where(
        df['rule_based_classification_granular'] == default_label,
        'Non-senescent',
        'Senescent'
    )
    print(f"\nFinal rule-based classification counts (granular):\n{df['rule_based_classification_granular'].value_counts(dropna=False)}")
    print(f"\nFinal rule-based classification counts (binary 'cell_type'):\n{df['cell_type'].value_counts(dropna=False)}")
    return df

def generate_umap_coordinates(df, features_for_umap, area_log_features, umap_n_neighbors=15, umap_min_dist=0.1, umap_random_state=42):
    """Generates UMAP coordinates from specified features."""
    print("\n--- Generating UMAP Coordinates ---")

    actual_features_for_umap = [f for f in features_for_umap if f in df.columns]
    if not actual_features_for_umap:
        print("Error: No features specified for UMAP are present in the DataFrame.")
        df['umap_x'] = np.nan
        df['umap_y'] = np.nan
        return df

    umap_features_df = df[actual_features_for_umap].copy()

    for col in area_log_features:
        if col in umap_features_df.columns:
            umap_features_df[col] = np.log1p(umap_features_df[col])
            print(f"  Log-transformed for UMAP: {col}")

    if umap_features_df.isnull().values.any():
        print(f"  Found {umap_features_df.isnull().values.sum()} NaN values in UMAP features. Filling with column means.")
        umap_features_df = umap_features_df.fillna(umap_features_df.mean())

    if umap_features_df.isnull().values.any():
        print("  Error: NaNs still present after attempting to fill. UMAP may fail.")
        df['umap_x'] = np.nan
        df['umap_y'] = np.nan
        return df

    scaler = StandardScaler()
    scaled_umap_features = scaler.fit_transform(umap_features_df)

    actual_n_neighbors = min(umap_n_neighbors, scaled_umap_features.shape[0] - 1)
    if actual_n_neighbors < 2:
        print(f"Warning: Not enough samples ({scaled_umap_features.shape[0]}) for UMAP. Skipping UMAP.")
        df['umap_x'], df['umap_y'] = np.nan, np.nan
        return df

    reducer = umap.UMAP(n_neighbors=actual_n_neighbors, min_dist=umap_min_dist, random_state=umap_random_state, n_components=2)
    embedding = reducer.fit_transform(scaled_umap_features)
    df['umap_x'] = embedding[:, 0]
    df['umap_y'] = embedding[:, 1]
    print("UMAP coordinates generated.")
    return df

def visualize_results(df_to_visualize, output_dir_main):
    """Creates UMAP visualization based on rule-based classification."""
    print("\n--- Creating Visualizations (Rule-Based) ---")
    if df_to_visualize.empty or 'umap_x' not in df_to_visualize.columns or df_to_visualize['umap_x'].isnull().all():
        print("Skipping UMAP visualizations as UMAP data is missing or empty.")
        return

    plt.figure(figsize=(10, 8))
    if 'cell_type' in df_to_visualize.columns and df_to_visualize['cell_type'].notna().any():
        sns.scatterplot(
            x='umap_x', y='umap_y',
            hue='cell_type',
            palette={'Senescent': 'red', 'Non-senescent': 'blue', 'Unknown_Rules_Missing_Features': 'grey'},
            data=df_to_visualize,
            s=30, alpha=0.7, edgecolor='k', linewidth=0.5
        )
        plt.title('UMAP by Rule-Based Classification (Senescent/Non-senescent)', fontsize=14)
    else:
        plt.scatter(df_to_visualize['umap_x'], df_to_visualize['umap_y'], s=30, alpha=0.5)
        plt.title('UMAP Projection (Coloring unavailable)', fontsize=14)

    plt.xlabel('UMAP Dimension 1', fontsize=12)
    plt.ylabel('UMAP Dimension 2', fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.7)
    if 'cell_type' in df_to_visualize.columns: plt.legend(title='Cell Type (Rule-Based)')
    plt.savefig(os.path.join(output_dir_main, 'umap_rule_based_binary.png'), dpi=300, bbox_inches='tight')
    plt.close()
    print("UMAP visualization (binary rule-based) saved.")

    if 'rule_based_classification_granular' in df_to_visualize.columns and df_to_visualize['rule_based_classification_granular'].notna().any():
        plt.figure(figsize=(12, 10))
        unique_granular_labels = sorted(df_to_visualize['rule_based_classification_granular'].unique())
        palette_granular = {}
        sen_colors = sns.color_palette("Reds_r", n_colors=len([lbl for lbl in unique_granular_labels if lbl.startswith("Rule_Sen_")]))
        sen_idx = 0
        for label in unique_granular_labels:
            if label == RULE_BASED_DEFAULT_LABEL:
                palette_granular[label] = 'blue'
            elif label.startswith("Rule_Sen_"):
                palette_granular[label] = sen_colors[sen_idx % len(sen_colors)]
                sen_idx +=1
            else:
                palette_granular[label] = 'grey'

        sns.scatterplot(
            x='umap_x', y='umap_y',
            hue='rule_based_classification_granular',
            palette=palette_granular,
            hue_order=unique_granular_labels,
            data=df_to_visualize,
            s=30, alpha=0.7, edgecolor='k', linewidth=0.5
        )
        plt.title('UMAP by Granular Rule-Based Classification', fontsize=14)
        plt.xlabel('UMAP Dimension 1', fontsize=12)
        plt.ylabel('UMAP Dimension 2', fontsize=12)
        plt.grid(True, linestyle='--', alpha=0.7)
        plt.legend(title='Rule Applied', bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
        plt.tight_layout(rect=[0, 0, 0.85, 1])
        plt.savefig(os.path.join(output_dir_main, 'umap_rule_based_granular.png'), dpi=300)
        plt.close()
        print("UMAP visualization (granular rule-based) saved.")

def save_results(processed_df, output_directory):
    """Saves the final DataFrame and summary stats."""
    if processed_df.empty: print("No data to save."); return
    if not os.path.exists(output_directory): os.makedirs(output_directory)

    processed_df.to_csv(os.path.join(output_directory, 'cell_classification_rule_based_full.csv'), index=False)
    print(f"Full rule-based results saved to: {os.path.join(output_directory, 'cell_classification_rule_based_full.csv')}")

    if 'sample_id' in processed_df.columns and 'cell_type' in processed_df.columns:
        summary = processed_df.groupby('sample_id')['cell_type'].value_counts(normalize=False).unstack(fill_value=0)
        for col in ['Senescent', 'Non-senescent', 'Unknown_Rules_Missing_Features']:
            if col not in summary.columns: summary[col] = 0
        summary['total_cells'] = summary.sum(axis=1)
        if (summary['total_cells'] > 0).any():
            summary['percent_senescent'] = summary['Senescent'] / summary['total_cells'] * 100
        else: summary['percent_senescent'] = 0
        summary.to_csv(os.path.join(output_directory, 'sample_summary_rule_based.csv'))
        print(f"Rule-based sample summary saved to: {os.path.join(output_directory, 'sample_summary_rule_based.csv')}")

        if 'percent_senescent' in summary.columns:
            plt.figure(figsize=(12,6)); sns.barplot(x=summary.index, y='percent_senescent', data=summary.reset_index(), color='red')
            plt.title('Senescent Cells by Sample (Rule-Based)'); plt.xlabel('Sample ID'); plt.ylabel('Senescent (%)')
            plt.xticks(rotation=90, ha='right'); plt.tight_layout()
            plt.savefig(os.path.join(output_directory, 'senescent_percentage_rule_based.png'), dpi=300); plt.close()
            print("Rule-based senescent percentage plot saved.")

def main(base_dir, cell_subdir, nuclei_subdir, output_parent_dir):
    """Main analysis workflow."""
    print(f"=== Senescence Analysis (Rule-Based) for {base_dir} ===")
    cell_mask_dir = os.path.join(base_dir, cell_subdir)
    nuclei_dir = os.path.join(base_dir, nuclei_subdir)
    dataset_name = os.path.basename(base_dir)
    # MODIFIED LINE: Changed "Senescence_RuleBased_Results" to "Senescence_Results"
    output_dir_main = os.path.join(output_parent_dir, dataset_name, "Senescence_Results")
    if not os.path.exists(output_dir_main): os.makedirs(output_dir_main); print(f"Created: {output_dir_main}")

    file_pairs = find_mask_files(cell_mask_dir, nuclei_dir)
    if not file_pairs: print("No matching file pairs. Exiting."); return

    all_cell_features_list = []
    for file_pair in tqdm(file_pairs, desc="Processing image pairs"):
        print(f"\nProcessing sample: {file_pair['sample_id']}")
        try:
            cell_mask = load_mask_image(file_pair['cell_file'])
            nuclei_mask = load_mask_image(file_pair['nuclei_file'])
            if cell_mask is None or nuclei_mask is None: print(f"Skipping {file_pair['sample_id']}."); continue

            tracking_results = accurately_track_nuclei_in_cells(cell_mask, nuclei_mask)
            for sc_data in tracking_results['cell_data']:
                features = extract_features_for_cell(sc_data, tracking_results['cell_nuclei_mapping'], tracking_results['nuclei_data'])
                features['cell_id_unique'] = f"{file_pair['sample_id']}_{sc_data['cell_id']}"
                features['sample_id'] = file_pair['sample_id']
                features['original_cell_label'] = sc_data['cell_id']
                all_cell_features_list.append(features)
        except Exception as e: print(f"Error processing {file_pair['sample_id']}: {e}"); import traceback; traceback.print_exc()

    if not all_cell_features_list: print("No cell features extracted."); return
    all_features_df = pd.DataFrame(all_cell_features_list)
    print(f"\nTotal cells extracted: {len(all_features_df)}")

    all_features_df = calculate_senescence_score(all_features_df, SENESCENCE_SCORE_WEIGHTS, AREA_FEATURES_TO_LOG)
    all_features_df = generate_umap_coordinates(all_features_df, FEATURES_FOR_UMAP, AREA_FEATURES_TO_LOG)
    classified_df = apply_rule_based_classification(all_features_df, RULE_BASED_GATES, RULE_BASED_DEFAULT_LABEL)

    save_results(classified_df, output_dir_main)
    visualize_results(classified_df, output_dir_main)

    print("\nRule-Based Analysis complete!")

if __name__ == "__main__":
    segmented_data_parent_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented"
    current_dataset_folder_name = "Static-x20"
    base_input_dir = os.path.join(segmented_data_parent_dir, current_dataset_folder_name)
    cell_masks_subdir = "Cell_merged_conservative"
    nuclei_masks_subdir = "Nuclei"
    analysis_output_parent_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis"

    if not os.path.isdir(base_input_dir):
        print(f"Error: Base input directory not found: {base_input_dir}")
    else:
        main(base_input_dir, cell_masks_subdir, nuclei_masks_subdir, analysis_output_parent_dir)


=== Senescence Analysis (Rule-Based) for /content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented/Static-x20 ===

--- Finding and Pairing Mask Files ---
Found 23 cell mask files and 23 nuclei mask files
Total matching cell-nuclei file pairs found: 23


Processing image pairs:   0%|          | 0/23 [00:00<?, ?it/s]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq001
Found 304 cell objects, 318 nuclei objects.


Processing image pairs:   4%|▍         | 1/23 [00:02<00:49,  2.27s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq002
Found 234 cell objects, 252 nuclei objects.


Processing image pairs:   9%|▊         | 2/23 [00:04<00:44,  2.14s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq003
Found 310 cell objects, 332 nuclei objects.


Processing image pairs:  13%|█▎        | 3/23 [00:06<00:42,  2.12s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq004
Found 303 cell objects, 316 nuclei objects.


Processing image pairs:  17%|█▋        | 4/23 [00:10<00:55,  2.93s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq005
Found 236 cell objects, 253 nuclei objects.


Processing image pairs:  22%|██▏       | 5/23 [00:11<00:42,  2.35s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq006
Found 262 cell objects, 271 nuclei objects.


Processing image pairs:  26%|██▌       | 6/23 [00:13<00:33,  1.95s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq007
Found 214 cell objects, 233 nuclei objects.


Processing image pairs:  30%|███       | 7/23 [00:14<00:26,  1.66s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq008
Found 234 cell objects, 254 nuclei objects.


Processing image pairs:  35%|███▍      | 8/23 [00:15<00:22,  1.48s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq009
Found 283 cell objects, 296 nuclei objects.


Processing image pairs:  39%|███▉      | 9/23 [00:16<00:19,  1.42s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq010
Found 210 cell objects, 225 nuclei objects.


Processing image pairs:  43%|████▎     | 10/23 [00:17<00:16,  1.30s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq011
Found 261 cell objects, 278 nuclei objects.


Processing image pairs:  48%|████▊     | 11/23 [00:18<00:15,  1.28s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq001
Found 217 cell objects, 241 nuclei objects.


Processing image pairs:  52%|█████▏    | 12/23 [00:19<00:13,  1.20s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq002
Found 287 cell objects, 315 nuclei objects.


Processing image pairs:  57%|█████▋    | 13/23 [00:21<00:12,  1.26s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq003
Found 284 cell objects, 313 nuclei objects.


Processing image pairs:  61%|██████    | 14/23 [00:23<00:12,  1.44s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq004
Found 222 cell objects, 280 nuclei objects.


Processing image pairs:  65%|██████▌   | 15/23 [00:24<00:12,  1.53s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq005
Found 196 cell objects, 238 nuclei objects.


Processing image pairs:  70%|██████▉   | 16/23 [00:26<00:10,  1.44s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq006
Found 273 cell objects, 332 nuclei objects.


Processing image pairs:  74%|███████▍  | 17/23 [00:27<00:08,  1.44s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq007
Found 223 cell objects, 270 nuclei objects.


Processing image pairs:  78%|███████▊  | 18/23 [00:28<00:06,  1.33s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq008
Found 268 cell objects, 306 nuclei objects.


Processing image pairs:  83%|████████▎ | 19/23 [00:29<00:05,  1.33s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq009
Found 304 cell objects, 318 nuclei objects.


Processing image pairs:  87%|████████▋ | 20/23 [00:31<00:04,  1.35s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq010
Found 183 cell objects, 190 nuclei objects.


Processing image pairs:  91%|█████████▏| 21/23 [00:32<00:02,  1.21s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq011
Found 216 cell objects, 230 nuclei objects.


Processing image pairs:  96%|█████████▌| 22/23 [00:33<00:01,  1.16s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq012
Found 215 cell objects, 232 nuclei objects.


Processing image pairs: 100%|██████████| 23/23 [00:34<00:00,  1.49s/it]
  warn(



Total cells extracted: 5739

Calculating per-cell senescence score...
  Log-transformed for scoring: cell_area
  Log-transformed for scoring: avg_nucleus_area
  Log-transformed for scoring: cell_perimeter
Senescence score calculation complete.

--- Generating UMAP Coordinates ---
  Log-transformed for UMAP: cell_area
  Log-transformed for UMAP: avg_nucleus_area
  Log-transformed for UMAP: max_nucleus_area
  Log-transformed for UMAP: cell_perimeter
UMAP coordinates generated.

--- Applying Rule-Based Classification ---
  Applying rule: Polynucleated
    386 cells labeled as 'Rule_Sen_Poly'.
  Applying rule: Very_Large_Cell
    1322 cells labeled as 'Rule_Sen_VeryLarge'.
  Applying rule: Low_Circularity
    21 cells labeled as 'Rule_Sen_LowCirc'.
  Applying rule: Low_NucToCellRatio
    46 cells labeled as 'Rule_Sen_LowNucRatio'.
  Applying rule: High_Score_Not_Otherwise_Caught
    0 cells labeled as 'Rule_Sen_HighScore'.

Final rule-based classification counts (granular):
rule_based_cla

In [6]:
import os
import numpy as np
import pandas as pd
import cv2
import re
from scipy import ndimage
from skimage import io, measure, segmentation # Added segmentation
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches # Added for legends in mask visualization
import seaborn as sns
from sklearn.preprocessing import StandardScaler
# KMeans is no longer used for final classification but can be kept for exploratory UMAP clustering
from sklearn.cluster import KMeans
import umap
from tqdm import tqdm

# --- Configuration & Parameters ---
# Features to be used for UMAP generation
FEATURES_FOR_UMAP = [
    'cell_area', 'cell_perimeter', 'cell_eccentricity', 'cell_circularity',
    'cell_aspect_ratio', 'avg_nucleus_area', 'max_nucleus_area',
    'avg_nucleus_eccentricity', 'nucleus_area_std', 'nucleus_displacement_avg',
    'nucleus_to_cell_area_ratio',
    'nuclear_enlargement_factor', 'cell_enlargement_factor'
]

# Features to log-transform before UMAP and Senescence Score calculation
AREA_FEATURES_TO_LOG = ['cell_area', 'avg_nucleus_area', 'total_nuclear_area', 'max_nucleus_area', 'cell_perimeter']

# Weights for calculating the senescence score
SENESCENCE_SCORE_WEIGHTS = {
    'cell_area': 1.5,
    'cell_perimeter': 0.5,
    'cell_eccentricity': 0.5,
    'cell_circularity': -1.0,
    'cell_aspect_ratio': 0.5,
    'avg_nucleus_area': 1.0,
    'avg_nucleus_eccentricity': 0.3,
    'nucleus_to_cell_area_ratio': -1.0,
    'nuclear_enlargement_factor': 1.0,
    'cell_enlargement_factor': 1.5,
    'nucleus_displacement_avg': 0.2,
}

# Rule-based gating definition from user
RULE_BASED_GATES = [
    {   'name': 'Polynucleated',
        'conditions': [('nuclei_count', '>', 1)],
        'output_label': 'Rule_Sen_Poly' },
    {   'name': 'Very_Large_Cell',
        'conditions': [('cell_area', '>', 5000)],
        'output_label': 'Rule_Sen_VeryLarge' },
    {   'name': 'Low_Circularity',
        'conditions': [('cell_circularity', '<', 0.2)],
        'output_label': 'Rule_Sen_LowCirc' },
    {   'name': 'Low_NucToCellRatio',
        'conditions': [('nucleus_to_cell_area_ratio', '<', 0.1)],
        'output_label': 'Rule_Sen_LowNucRatio' },
    {   'name': 'High_Score_Not_Otherwise_Caught',
        'conditions': [('senescence_score_normalized', '>', 0.75)],
        'output_label': 'Rule_Sen_HighScore' }
]
RULE_BASED_DEFAULT_LABEL = 'Rule_NonSenescent'

# Specific label for polynucleated senescent cells (must match an output_label from RULE_BASED_GATES)
POLYNUCLEATED_SENESCENT_RULE_LABEL = 'Rule_Sen_Poly'

# --- Color Definitions for Mask Visualization ---
COLOR_NON_SENESCENT = [0, 0, 255]  # Blue
COLOR_SENESCENT_POLYNUCLEATED = [255, 165, 0]  # Orange
COLOR_SENESCENT_OTHER_RULES = [255, 0, 0]  # Red
COLOR_CELL_BOUNDARY = [255, 255, 255]  # White
COLOR_NUCLEI_OUTLINE = [255, 255, 0]  # Yellow
COLOR_UNKNOWN = [128, 128, 128] # Grey

# Subdirectory for mask visualizations
MASK_VISUALIZATION_SUBDIR = "mask_overlays_enhanced"


# Function to extract sample ID
def extract_sample_id(filename):
    base_name = os.path.splitext(filename)[0]
    if base_name.startswith('denoised_'):
        base_name = base_name[len('denoised_'):]
    pattern = re.compile(r'(.+?_seq\d+)')
    match = pattern.search(base_name)
    if match: return match.group(1)
    parts = base_name.split('_')
    for i, part in enumerate(parts):
        if part.startswith('seq') and i > 0: return '_'.join(parts[:i+1])
    print(f"Warning: Fallback ID extraction for '{filename}'.")
    return '_'.join(base_name.split('_')[:7])

def find_mask_files(cell_dir, nuclei_dir):
    print("\n--- Finding and Pairing Mask Files ---")
    cell_files = [f for f in os.listdir(cell_dir) if f.endswith(('.tif', '.tiff')) and not f.startswith('.')]
    nuclei_files = [f for f in os.listdir(nuclei_dir) if f.endswith(('.tif', '.tiff')) and not f.startswith('.')]
    print(f"Found {len(cell_files)} cell files, {len(nuclei_files)} nuclei files")
    nuclei_lookup = {extract_sample_id(nf): nf for nf in nuclei_files if extract_sample_id(nf)}
    file_pairs = []
    for cf in cell_files:
        sid = extract_sample_id(cf)
        if sid and sid in nuclei_lookup:
            file_pairs.append({'cell_file': os.path.join(cell_dir, cf),
                               'nuclei_file': os.path.join(nuclei_dir, nuclei_lookup[sid]),
                               'sample_id': sid})
        elif sid: print(f"Warning: No matching nuclei file for cell ID: {sid} (from {cf})")
        else: print(f"Warning: Could not extract ID for cell file: {cf}")
    print(f"Total matching pairs: {len(file_pairs)}")
    if not file_pairs and (cell_files or nuclei_files): print("CRITICAL: No pairs matched.")
    return file_pairs

def load_mask_image(filepath):
    try:
        img = io.imread(filepath)
        if img.ndim > 2:
            img = img[..., 0]
        if img.dtype.kind in 'iu' and np.max(img) > 1:
            return img.astype(np.uint16)
        if img.dtype == bool or (img.dtype == np.uint8 and np.max(img) <= 1) or img.dtype.kind == 'f':
            binary_img = (img > (0.5 if img.dtype.kind == 'f' else 0)).astype(np.uint8)
            labeled_img, _ = ndimage.label(binary_img)
            return labeled_img.astype(np.uint16)
        if img.dtype == np.uint8 and np.max(img) > 1:
            return img.astype(np.uint16)
        print(f"Warning: Unhandled image type {img.dtype} for {filepath}. Attempting basic labeling.")
        labeled_img, _ = ndimage.label((img > np.median(img)).astype(np.uint8))
        return labeled_img.astype(np.uint16)
    except Exception as e: print(f"Error loading image {filepath}: {e}"); return None

def accurately_track_nuclei_in_cells(cell_mask, nuclei_mask):
    labeled_cells, num_cells = cell_mask, np.max(cell_mask)
    labeled_nuclei, num_nuclei = nuclei_mask, np.max(nuclei_mask)
    print(f"Found {num_cells} cells, {num_nuclei} nuclei.")
    if num_cells == 0 or num_nuclei == 0: return {'cell_data': [], 'nuclei_data': [], 'cell_nuclei_mapping': {}}

    cell_props = measure.regionprops(labeled_cells)
    nuclei_props = measure.regionprops(labeled_nuclei)
    results = {'cell_data': [], 'nuclei_data': [], 'cell_nuclei_mapping': {}}

    for cp in cell_props:
        results['cell_data'].append({
            'cell_id': cp.label, 'area': cp.area, 'perimeter': cp.perimeter,
            'eccentricity': getattr(cp, 'eccentricity', 0), 'orientation': np.degrees(getattr(cp, 'orientation', 0)),
            'major_axis_length': getattr(cp, 'major_axis_length', 0), 'minor_axis_length': getattr(cp, 'minor_axis_length', 0),
            'centroid_y': cp.centroid[0], 'centroid_x': cp.centroid[1], 'nuclei_count': 0, 'nuclei_ids_in_cell': []
        })
        results['cell_nuclei_mapping'][cp.label] = []

    for np_region in nuclei_props:
        nuc_id, nuc_area = np_region.label, np_region.area
        nuc_mask_single = (labeled_nuclei == nuc_id)
        best_cell_id, max_overlap = None, 0
        min_r, min_c, max_r, max_c = np_region.bbox
        for cell_cand_lbl in np.unique(labeled_cells[min_r:max_r, min_c:max_c]):
            if cell_cand_lbl == 0: continue
            overlap = np.sum(np.logical_and(labeled_cells == cell_cand_lbl, nuc_mask_single))
            if nuc_area > 0 and (overlap / nuc_area) > max_overlap:
                max_overlap, best_cell_id = (overlap / nuc_area), cell_cand_lbl
        if best_cell_id and max_overlap > 0.5:
            results['nuclei_data'].append({
                'nucleus_id': nuc_id, 'cell_id': best_cell_id, 'area': nuc_area,
                'eccentricity': getattr(np_region, 'eccentricity', 0),
                'centroid_y': np_region.centroid[0], 'centroid_x': np_region.centroid[1], 'overlap_ratio': max_overlap
            })
            for cd_item in results['cell_data']:
                if cd_item['cell_id'] == best_cell_id: cd_item['nuclei_count']+=1; cd_item['nuclei_ids_in_cell'].append(nuc_id); break
            results['cell_nuclei_mapping'][best_cell_id].append(nuc_id)
    return results

def extract_features_for_cell(cd_item, cn_map, nuc_data_list):
    feats = {'cell_area': cd_item['area'], 'cell_perimeter': cd_item['perimeter'],
             'cell_eccentricity': cd_item.get('eccentricity',0)}
    feats['cell_circularity'] = (4*np.pi*feats['cell_area']/(feats['cell_perimeter']**2)) if feats['cell_perimeter']>0 else 0
    maj, minor = cd_item.get('major_axis_length'), cd_item.get('minor_axis_length')
    feats['cell_aspect_ratio'] = (maj/minor) if maj and minor and minor>0 else 1.0

    orig_cell_lbl = cd_item['cell_id']
    nuc_orig_lbls = cn_map.get(orig_cell_lbl, [])
    feats['nuclei_count'] = len(nuc_orig_lbls)

    for k_nuc in ['avg_nucleus_area', 'total_nuclear_area', 'max_nucleus_area', 'avg_nucleus_eccentricity',
                  'nucleus_area_std', 'nucleus_displacement_avg', 'nucleus_to_cell_area_ratio']: feats[k_nuc]=0
    if feats['nuclei_count'] > 0:
        nuc_details_incell = [nd for nd in nuc_data_list if nd['nucleus_id'] in nuc_orig_lbls and nd['cell_id']==orig_cell_lbl]
        if nuc_details_incell:
            areas = [n['area'] for n in nuc_details_incell]; eccs = [n.get('eccentricity',0) for n in nuc_details_incell]
            feats.update({
                'avg_nucleus_area': np.mean(areas) if areas else 0, 'total_nuclear_area': sum(areas),
                'max_nucleus_area': max(areas) if areas else 0, 'avg_nucleus_eccentricity': np.mean(eccs) if eccs else 0,
                'nucleus_area_std': np.std(areas) if len(areas)>1 else 0,
                'nucleus_to_cell_area_ratio': (sum(areas)/feats['cell_area']) if feats['cell_area']>0 else 0
            })
            disps = [np.sqrt((n['centroid_x']-cd_item['centroid_x'])**2 + (n['centroid_y']-cd_item['centroid_y'])**2) for n in nuc_details_incell]
            feats['nucleus_displacement_avg'] = np.mean(disps) if disps else 0

    norm_nuc_A, norm_cell_A = 500, 2000
    feats['nuclear_enlargement_factor'] = feats.get('avg_nucleus_area',0)/norm_nuc_A if norm_nuc_A>0 and feats.get('avg_nucleus_area',0)>0 else 1.0
    feats['cell_enlargement_factor'] = feats['cell_area']/norm_cell_A if norm_cell_A>0 and feats['cell_area']>0 else 1.0
    return feats

def calculate_senescence_score(df, weights, log_features):
    print("\nCalculating senescence score...")
    present_feats = [f for f in weights.keys() if f in df.columns]
    if not present_feats: df['senescence_score'], df['senescence_score_normalized'] = np.nan, np.nan; return df

    score_subset = df[present_feats].copy()
    for col in log_features:
        if col in score_subset.columns: score_subset[col] = np.log1p(score_subset[col])

    scaler = StandardScaler()
    num_cols = score_subset.select_dtypes(include=np.number).columns
    std_df = pd.DataFrame(index=score_subset.index)
    if not num_cols.empty:
        std_vals = scaler.fit_transform(score_subset[num_cols])
        std_df = pd.DataFrame(std_vals, columns=num_cols, index=score_subset.index)

    df['senescence_score'] = sum(std_df[f] * w for f, w in weights.items() if f in std_df.columns)
    min_s, max_s = df['senescence_score'].min(), df['senescence_score'].max()
    df['senescence_score_normalized'] = 0.5 if pd.isna(min_s) or min_s==max_s else (df['senescence_score']-min_s)/(max_s-min_s)
    print("Senescence score calculation complete.")
    return df

def apply_rule_based_classification(df, rules, default_label):
    print("\n--- Applying Rule-Based Classification ---")
    df['rule_based_classification_granular'] = default_label
    all_rule_feats = set(cond[0] for r in rules for cond in r['conditions'])
    if any(feat not in df.columns for feat in all_rule_feats):
        print(f"ERROR: Missing rule features: {[f for f in all_rule_feats if f not in df.columns]}. Aborting."); df['cell_type'] = 'Unknown_Rules_Missing'; return df

    for rule in rules:
        print(f"  Applying rule: {rule['name']}")
        eligible = (df['rule_based_classification_granular'] == default_label)
        if not eligible.any(): continue

        current_mask = pd.Series([True]*len(df), index=df.index)
        for feat, op, val in rule['conditions']:
            feat_series = pd.to_numeric(df[feat], errors='coerce')
            nan_m = feat_series.isnull()
            if op=='>': current_mask &= (feat_series > val)
            elif op=='<': current_mask &= (feat_series < val)
            elif op=='>=': current_mask &= (feat_series >=value) # Added >=
            elif op=='<=': current_mask &= (feat_series <=value) # Added <=
            elif op=='==': current_mask &= (feat_series ==value) # Added ==
            elif op=='!=': current_mask &= (feat_series !=value) # Added !=
            else: print(f"Unknown op: {op}"); current_mask[:]=False; break
            current_mask[nan_m] = False

        if current_mask.any():
            to_label = eligible & current_mask
            df.loc[to_label, 'rule_based_classification_granular'] = rule['output_label']
            print(f"    {to_label.sum()} cells labeled as '{rule['output_label']}'.")

    df['cell_type'] = np.where(df['rule_based_classification_granular']==default_label, 'Non-senescent', 'Senescent')
    print(f"\nGranular counts:\n{df['rule_based_classification_granular'].value_counts(dropna=False)}")
    print(f"\nBinary 'cell_type' counts:\n{df['cell_type'].value_counts(dropna=False)}")
    return df

def generate_umap_coordinates(df, umap_feats, log_feats, n_neigh=15, min_d=0.1, rand_state=42):
    print("\n--- Generating UMAP Coordinates ---")
    actual_umap_feats = [f for f in umap_feats if f in df.columns]
    if not actual_umap_feats: df['umap_x'], df['umap_y'] = np.nan, np.nan; return df

    umap_df = df[actual_umap_feats].copy()
    for col in log_feats:
        if col in umap_df.columns: umap_df[col] = np.log1p(umap_df[col])
    if umap_df.isnull().values.any(): umap_df = umap_df.fillna(umap_df.mean())
    if umap_df.isnull().values.any(): df['umap_x'],df['umap_y']=np.nan,np.nan; return df

    scaled_feats = StandardScaler().fit_transform(umap_df)
    actual_n_neigh = min(n_neigh, scaled_feats.shape[0]-1)
    if actual_n_neigh < 2: df['umap_x'],df['umap_y']=np.nan,np.nan; return df

    embedding = umap.UMAP(n_neighbors=actual_n_neigh, min_dist=min_d, random_state=rand_state).fit_transform(scaled_feats)
    df['umap_x'], df['umap_y'] = embedding[:,0], embedding[:,1]
    print("UMAP coordinates generated.")
    return df

def visualize_results(df_viz, out_dir):
    print("\n--- Creating Visualizations (Rule-Based) ---")
    if df_viz.empty or 'umap_x' not in df_viz or df_viz['umap_x'].isnull().all(): print("Skipping UMAP plots."); return

    plt.figure(figsize=(10,8));
    if 'cell_type' in df_viz and df_viz['cell_type'].notna().any():
        sns.scatterplot(x='umap_x', y='umap_y', hue='cell_type',
                        palette={'Senescent':'red', 'Non-senescent':'blue', 'Unknown_Rules_Missing':'grey'},
                        data=df_viz, s=30, alpha=0.7, edgecolor='k', linewidth=0.5)
        plt.legend(title='Cell Type (Rule-Based)')
    else: plt.scatter(df_viz['umap_x'], df_viz['umap_y'], s=30, alpha=0.5)
    plt.title('UMAP by Rule-Based Classification (Binary)'); plt.xlabel('UMAP 1'); plt.ylabel('UMAP 2'); plt.grid(True, alpha=0.3)
    plt.savefig(os.path.join(out_dir, 'umap_rule_based_binary.png'), dpi=300, bbox_inches='tight'); plt.close()
    print("Binary UMAP saved.")

    if 'rule_based_classification_granular' in df_viz and df_viz['rule_based_classification_granular'].notna().any():
        plt.figure(figsize=(12,10));
        labels = sorted(df_viz['rule_based_classification_granular'].unique())
        pal = {lbl:('blue' if lbl==RULE_BASED_DEFAULT_LABEL else ('grey' if not lbl.startswith("Rule_Sen_") else None)) for lbl in labels}
        sen_lbls = [lbl for lbl in labels if lbl.startswith("Rule_Sen_")]
        # Ensure enough colors for senescent categories, even if few are present
        num_sen_categories = len(RULE_BASED_GATES) -1 # Max possible senescent categories excluding default
        sen_cols = sns.color_palette("coolwarm_r", n_colors=max(1,num_sen_categories))

        # Map specific senescent rule labels to colors
        # This ensures consistent coloring if not all senescent rules are met in the data
        sen_rule_labels_ordered = [r['output_label'] for r in RULE_BASED_GATES if r['output_label'] != RULE_BASED_DEFAULT_LABEL]

        for i, sl_config in enumerate(sen_rule_labels_ordered):
            if sl_config in pal: # Check if this rule label is actually present in the data
                 pal[sl_config] = sen_cols[i % len(sen_cols)]


        sns.scatterplot(x='umap_x', y='umap_y', hue='rule_based_classification_granular', palette=pal, hue_order=labels,
                        data=df_viz, s=30, alpha=0.7, edgecolor='k', linewidth=0.5)
        plt.title('UMAP by Granular Rule Classification'); plt.xlabel('UMAP 1'); plt.ylabel('UMAP 2'); plt.grid(True, alpha=0.3)
        plt.legend(title='Rule Applied', bbox_to_anchor=(1.05,1), loc='upper left'); plt.tight_layout(rect=[0,0,0.85,1])
        plt.savefig(os.path.join(out_dir, 'umap_rule_based_granular.png'), dpi=300); plt.close()
        print("Granular UMAP saved.")

def save_results(df_proc, out_dir):
    if df_proc.empty: print("No data to save."); return
    if not os.path.exists(out_dir): os.makedirs(out_dir)
    df_proc.to_csv(os.path.join(out_dir, 'cell_classification_rule_based_full.csv'), index=False)
    print(f"Full rule-based results saved.")
    if 'sample_id' in df_proc and 'cell_type' in df_proc:
        summary = df_proc.groupby('sample_id')['cell_type'].value_counts(normalize=False).unstack(fill_value=0)
        for col in ['Senescent','Non-senescent','Unknown_Rules_Missing']:
            if col not in summary: summary[col]=0
        summary['total_cells'] = summary.sum(axis=1)
        summary['percent_senescent'] = (summary['Senescent']/summary['total_cells']*100) if (summary['total_cells']>0).any() else 0
        summary.to_csv(os.path.join(out_dir, 'sample_summary_rule_based.csv'))
        print(f"Rule-based sample summary saved.")
        if 'percent_senescent' in summary:
            plt.figure(figsize=(12,6)); sns.barplot(x=summary.index, y='percent_senescent', data=summary.reset_index(), color='red')
            plt.title('Senescent Cells by Sample (Rules)'); plt.xticks(rotation=90, ha='right'); plt.tight_layout()
            plt.savefig(os.path.join(out_dir, 'senescent_percentage_rule_based.png'), dpi=300); plt.close()
            print("Senescent percentage plot saved.")

def visualize_enhanced_classification_on_masks(df_results, cell_mask_dir_path, nuclei_mask_dir_path, output_dir_masks_path):
    """Visualizes cell classifications on original mask images, highlighting specific rules."""
    print(f"\nGenerating enhanced classification overlays in: {output_dir_masks_path}")
    if not os.path.exists(output_dir_masks_path):
        os.makedirs(output_dir_masks_path)

    # Use 'sample_id' for iterating samples, 'cell_id_unique' for lookup
    required_cols = ['sample_id', 'cell_id_unique', 'rule_based_classification_granular']
    if not all(col in df_results.columns for col in required_cols):
        missing = [col for col in required_cols if col not in df_results.columns]
        print(f"Error: DataFrame missing required columns for mask viz: {missing}.")
        return

    classification_lookup = pd.Series(
        df_results.rule_based_classification_granular.values,
        index=df_results.cell_id_unique
    ).to_dict()

    unique_samples = df_results['sample_id'].unique()

    available_cell_masks = {extract_sample_id(f): f for f in os.listdir(cell_mask_dir_path) if f.endswith(('.tif', '.tiff'))}
    available_nuclei_masks = {extract_sample_id(f): f for f in os.listdir(nuclei_mask_dir_path) if f.endswith(('.tif', '.tiff'))}

    for sample_id_val in tqdm(unique_samples, desc="Generating Enhanced Mask Overlays"):
        cell_mask_fname = available_cell_masks.get(sample_id_val)
        nuclei_mask_fname = available_nuclei_masks.get(sample_id_val)

        if not cell_mask_fname:
            print(f"  Warning: Cell mask file not found for sample ID: {sample_id_val}")
            continue

        print(f"\n  Processing sample for enhanced overlay: {sample_id_val}")
        labeled_cell_mask_img = load_mask_image(os.path.join(cell_mask_dir_path, cell_mask_fname))
        if labeled_cell_mask_img is None: continue

        overlay_img = np.zeros((labeled_cell_mask_img.shape[0], labeled_cell_mask_img.shape[1], 3), dtype=np.uint8)

        for cell_region_props in measure.regionprops(labeled_cell_mask_img):
            original_label_from_mask = cell_region_props.label
            lookup_id = f"{sample_id_val}_{original_label_from_mask}"
            granular_status_label = classification_lookup.get(lookup_id, 'Unknown_In_CSV')

            fill_color = COLOR_UNKNOWN
            if granular_status_label == POLYNUCLEATED_SENESCENT_RULE_LABEL:
                fill_color = COLOR_SENESCENT_POLYNUCLEATED
            elif granular_status_label.startswith('Rule_Sen_'):
                fill_color = COLOR_SENESCENT_OTHER_RULES
            elif granular_status_label == RULE_BASED_DEFAULT_LABEL:
                fill_color = COLOR_NON_SENESCENT

            overlay_img[labeled_cell_mask_img == original_label_from_mask] = fill_color

        cell_boundaries_img = segmentation.find_boundaries(labeled_cell_mask_img, mode='outer', background=0)
        overlay_img[cell_boundaries_img] = COLOR_CELL_BOUNDARY

        labeled_nuc_mask_img = None
        if nuclei_mask_fname:
            labeled_nuc_mask_img = load_mask_image(os.path.join(nuclei_mask_dir_path, nuclei_mask_fname))
            if labeled_nuc_mask_img is not None:
                nuc_boundaries_img = segmentation.find_boundaries(labeled_nuc_mask_img, mode='inner', background=0)
                overlay_img[nuc_boundaries_img] = COLOR_NUCLEI_OUTLINE

        fig_plot, ax_plot = None, None
        try:
            h_img, w_img = overlay_img.shape[:2]
            fig_w_calc = max(10, w_img / 100 if w_img > 0 else 10)
            fig_h_calc = max(8, h_img / 100 if h_img > 0 else 8) * (fig_w_calc / (w_img/100 if w_img > 0 else 1))

            fig_plot, ax_plot = plt.subplots(figsize=(fig_w_calc, fig_h_calc), dpi=100)
            ax_plot.imshow(overlay_img)

            legend_handles = [
                mpatches.Patch(color=np.array(COLOR_NON_SENESCENT)/255., label=f"{RULE_BASED_DEFAULT_LABEL} (Non-Senescent)"),
                mpatches.Patch(color=np.array(COLOR_SENESCENT_OTHER_RULES)/255., label='Senescent (Other Rules)'),
                mpatches.Patch(color=np.array(COLOR_SENESCENT_POLYNUCLEATED)/255., label=f'Senescent ({POLYNUCLEATED_SENESCENT_RULE_LABEL})')
            ]
            # Check if any cells in the current image were actually 'Unknown_In_CSV'
            is_unknown_present = any(
                classification_lookup.get(f"{sample_id_val}_{p.label}", 'Unknown_In_CSV') == 'Unknown_In_CSV'
                for p in measure.regionprops(labeled_cell_mask_img)
            )
            if is_unknown_present:
                 legend_handles.append(mpatches.Patch(color=np.array(COLOR_UNKNOWN)/255., label='Unknown/Not in CSV'))

            if nuclei_mask_fname and labeled_nuc_mask_img is not None:
                legend_handles.append(mpatches.Patch(color=np.array(COLOR_NUCLEI_OUTLINE)/255., label='Nuclei Outline'))
            legend_handles.append(mpatches.Patch(edgecolor=np.array(COLOR_CELL_BOUNDARY)/255., facecolor='none', label='Cell Boundary', linewidth=1))

            ax_plot.legend(handles=legend_handles, loc='center left', bbox_to_anchor=(1.02, 0.5), fontsize='small', title="Legend")
            ax_plot.axis('off')
            plt.tight_layout(rect=[0, 0, 0.83, 1])

            output_img_filename = os.path.join(output_dir_masks_path, f"{sample_id_val}_enhanced_mask_overlay.png")
            plt.savefig(output_img_filename, dpi=200)
            print(f"    Saved enhanced overlay for {sample_id_val} to {output_img_filename}")
        except Exception as e_plt:
            print(f"    Error during plotting/saving mask overlay for {sample_id_val}: {e_plt}")
        finally:
            if fig_plot: plt.close(fig_plot)

    print("\nEnhanced mask overlay visualization complete.")


def main(base_dir, cell_subdir, nuclei_subdir, output_parent_dir):
    print(f"=== Senescence Analysis (Rule-Based) for {base_dir} ===")
    cell_mask_dir_path = os.path.join(base_dir, cell_subdir)
    nuclei_mask_dir_path = os.path.join(base_dir, nuclei_subdir)
    dataset_folder_name = os.path.basename(base_dir)
    main_output_dir = os.path.join(output_parent_dir, dataset_folder_name, "Senescence_Results")
    if not os.path.exists(main_output_dir): os.makedirs(main_output_dir); print(f"Created: {main_output_dir}")

    file_pairs_list = find_mask_files(cell_mask_dir_path, nuclei_mask_dir_path)
    if not file_pairs_list: print("No file pairs. Exiting."); return

    features_list = []
    for pair_info in tqdm(file_pairs_list, desc="Processing image pairs"):
        print(f"\nProcessing sample: {pair_info['sample_id']}")
        try:
            cm_img = load_mask_image(pair_info['cell_file'])
            nm_img = load_mask_image(pair_info['nuclei_file'])
            if cm_img is None or nm_img is None: print(f"Skipping {pair_info['sample_id']}."); continue

            track_res = accurately_track_nuclei_in_cells(cm_img, nm_img)
            for cell_d in track_res['cell_data']:
                cell_feats = extract_features_for_cell(cell_d, track_res['cell_nuclei_mapping'], track_res['nuclei_data'])
                cell_feats['cell_id_unique'] = f"{pair_info['sample_id']}_{cell_d['cell_id']}"
                cell_feats['sample_id'] = pair_info['sample_id']
                cell_feats['original_cell_label'] = cell_d['cell_id']
                features_list.append(cell_feats)
        except Exception as e_proc: print(f"Error processing {pair_info['sample_id']}: {e_proc}"); import traceback; traceback.print_exc()

    if not features_list: print("No features extracted."); return
    master_df = pd.DataFrame(features_list)
    print(f"\nTotal cells extracted: {len(master_df)}")

    master_df = calculate_senescence_score(master_df, SENESCENCE_SCORE_WEIGHTS, AREA_FEATURES_TO_LOG)
    master_df = generate_umap_coordinates(master_df, FEATURES_FOR_UMAP, AREA_FEATURES_TO_LOG)
    final_classified_df = apply_rule_based_classification(master_df, RULE_BASED_GATES, RULE_BASED_DEFAULT_LABEL)

    save_results(final_classified_df, main_output_dir)
    visualize_results(final_classified_df, main_output_dir)

    enhanced_masks_output_path = os.path.join(main_output_dir, MASK_VISUALIZATION_SUBDIR)
    visualize_enhanced_classification_on_masks(final_classified_df, cell_mask_dir_path, nuclei_mask_dir_path, enhanced_masks_output_path)

    print("\nRule-Based Analysis and Mask Visualization complete!")

if __name__ == "__main__":
    segmented_data_parent_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented"
    current_dataset_folder_name = "Static-x20"
    base_input_dir = os.path.join(segmented_data_parent_dir, current_dataset_folder_name)
    cell_masks_subdir = "Cell_merged_conservative"
    nuclei_masks_subdir = "Nuclei"
    analysis_output_parent_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis"

    if not os.path.isdir(base_input_dir):
        print(f"Error: Base input directory not found: {base_input_dir}")
    else:
        main(base_input_dir, cell_masks_subdir, nuclei_masks_subdir, analysis_output_parent_dir)


=== Senescence Analysis (Rule-Based) for /content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented/Static-x20 ===

--- Finding and Pairing Mask Files ---
Found 23 cell files, 23 nuclei files
Total matching pairs: 23


Processing image pairs:   0%|          | 0/23 [00:00<?, ?it/s]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq001
Found 304 cells, 318 nuclei.


Processing image pairs:   4%|▍         | 1/23 [00:03<01:08,  3.13s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq002
Found 234 cells, 252 nuclei.


Processing image pairs:   9%|▊         | 2/23 [00:05<00:52,  2.52s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq003
Found 310 cells, 332 nuclei.


Processing image pairs:  13%|█▎        | 3/23 [00:07<00:47,  2.35s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq004
Found 303 cells, 316 nuclei.


Processing image pairs:  17%|█▋        | 4/23 [00:12<01:08,  3.63s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq005
Found 236 cells, 253 nuclei.


Processing image pairs:  22%|██▏       | 5/23 [00:14<00:48,  2.71s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq006
Found 262 cells, 271 nuclei.


Processing image pairs:  26%|██▌       | 6/23 [00:15<00:37,  2.20s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq007
Found 214 cells, 233 nuclei.


Processing image pairs:  30%|███       | 7/23 [00:16<00:29,  1.83s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq008
Found 234 cells, 254 nuclei.


Processing image pairs:  35%|███▍      | 8/23 [00:17<00:24,  1.61s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq009
Found 283 cells, 296 nuclei.


Processing image pairs:  39%|███▉      | 9/23 [00:18<00:21,  1.51s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq010
Found 210 cells, 225 nuclei.


Processing image pairs:  43%|████▎     | 10/23 [00:19<00:17,  1.37s/it]


Processing sample: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq011
Found 261 cells, 278 nuclei.


Processing image pairs:  48%|████▊     | 11/23 [00:21<00:16,  1.34s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq001
Found 217 cells, 241 nuclei.


Processing image pairs:  52%|█████▏    | 12/23 [00:22<00:13,  1.24s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq002
Found 287 cells, 315 nuclei.


Processing image pairs:  57%|█████▋    | 13/23 [00:23<00:13,  1.36s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq003
Found 284 cells, 313 nuclei.


Processing image pairs:  61%|██████    | 14/23 [00:25<00:13,  1.50s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq004
Found 222 cells, 280 nuclei.


Processing image pairs:  65%|██████▌   | 15/23 [00:27<00:12,  1.55s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq005
Found 196 cells, 238 nuclei.


Processing image pairs:  70%|██████▉   | 16/23 [00:28<00:09,  1.41s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq006
Found 273 cells, 332 nuclei.


Processing image pairs:  74%|███████▍  | 17/23 [00:29<00:08,  1.42s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq007
Found 223 cells, 270 nuclei.


Processing image pairs:  78%|███████▊  | 18/23 [00:30<00:06,  1.34s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq008
Found 268 cells, 306 nuclei.


Processing image pairs:  83%|████████▎ | 19/23 [00:32<00:05,  1.34s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq009
Found 304 cells, 318 nuclei.


Processing image pairs:  87%|████████▋ | 20/23 [00:33<00:04,  1.35s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq010
Found 183 cells, 190 nuclei.


Processing image pairs:  91%|█████████▏| 21/23 [00:34<00:02,  1.22s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq011
Found 216 cells, 230 nuclei.


Processing image pairs:  96%|█████████▌| 22/23 [00:35<00:01,  1.16s/it]


Processing sample: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq012
Found 215 cells, 232 nuclei.


Processing image pairs: 100%|██████████| 23/23 [00:36<00:00,  1.59s/it]
  warn(



Total cells extracted: 5739

Calculating senescence score...
Senescence score calculation complete.

--- Generating UMAP Coordinates ---
UMAP coordinates generated.

--- Applying Rule-Based Classification ---
  Applying rule: Polynucleated
    386 cells labeled as 'Rule_Sen_Poly'.
  Applying rule: Very_Large_Cell
    1322 cells labeled as 'Rule_Sen_VeryLarge'.
  Applying rule: Low_Circularity
    21 cells labeled as 'Rule_Sen_LowCirc'.
  Applying rule: Low_NucToCellRatio
    46 cells labeled as 'Rule_Sen_LowNucRatio'.
  Applying rule: High_Score_Not_Otherwise_Caught
    0 cells labeled as 'Rule_Sen_HighScore'.

Granular counts:
rule_based_classification_granular
Rule_NonSenescent       3964
Rule_Sen_VeryLarge      1322
Rule_Sen_Poly            386
Rule_Sen_LowNucRatio      46
Rule_Sen_LowCirc          21
Name: count, dtype: int64

Binary 'cell_type' counts:
cell_type
Non-senescent    3964
Senescent        1775
Name: count, dtype: int64
Full rule-based results saved.
Rule-based sample 

Generating Enhanced Mask Overlays:   0%|          | 0/23 [00:00<?, ?it/s]


  Processing sample for enhanced overlay: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq001


Generating Enhanced Mask Overlays:   4%|▍         | 1/23 [00:01<00:39,  1.81s/it]

    Saved enhanced overlay for 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq001 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_19dec21_20xA_L2RA_FlatA_seq001_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq002


Generating Enhanced Mask Overlays:   9%|▊         | 2/23 [00:03<00:33,  1.58s/it]

    Saved enhanced overlay for 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq002 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_19dec21_20xA_L2RA_FlatA_seq002_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq003


Generating Enhanced Mask Overlays:  13%|█▎        | 3/23 [00:04<00:32,  1.62s/it]

    Saved enhanced overlay for 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq003 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_19dec21_20xA_L2RA_FlatA_seq003_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq004


Generating Enhanced Mask Overlays:  17%|█▋        | 4/23 [00:06<00:30,  1.62s/it]

    Saved enhanced overlay for 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq004 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_19dec21_20xA_L2RA_FlatA_seq004_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq005


Generating Enhanced Mask Overlays:  22%|██▏       | 5/23 [00:07<00:27,  1.55s/it]

    Saved enhanced overlay for 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq005 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_19dec21_20xA_L2RA_FlatA_seq005_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq006


Generating Enhanced Mask Overlays:  26%|██▌       | 6/23 [00:09<00:26,  1.54s/it]

    Saved enhanced overlay for 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq006 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_19dec21_20xA_L2RA_FlatA_seq006_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq007


Generating Enhanced Mask Overlays:  30%|███       | 7/23 [00:10<00:24,  1.52s/it]

    Saved enhanced overlay for 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq007 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_19dec21_20xA_L2RA_FlatA_seq007_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq008


Generating Enhanced Mask Overlays:  35%|███▍      | 8/23 [00:13<00:25,  1.73s/it]

    Saved enhanced overlay for 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq008 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_19dec21_20xA_L2RA_FlatA_seq008_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq009


Generating Enhanced Mask Overlays:  39%|███▉      | 9/23 [00:15<00:25,  1.83s/it]

    Saved enhanced overlay for 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq009 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_19dec21_20xA_L2RA_FlatA_seq009_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq010


Generating Enhanced Mask Overlays:  43%|████▎     | 10/23 [00:16<00:21,  1.69s/it]

    Saved enhanced overlay for 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq010 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_19dec21_20xA_L2RA_FlatA_seq010_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq011


Generating Enhanced Mask Overlays:  48%|████▊     | 11/23 [00:18<00:19,  1.63s/it]

    Saved enhanced overlay for 0Pa_A1_19dec21_20xA_L2RA_FlatA_seq011 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_19dec21_20xA_L2RA_FlatA_seq011_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq001


Generating Enhanced Mask Overlays:  52%|█████▏    | 12/23 [00:19<00:17,  1.55s/it]

    Saved enhanced overlay for 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq001 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_20dec21_20xA_L2RA_FlatA_seq001_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq002


Generating Enhanced Mask Overlays:  57%|█████▋    | 13/23 [00:20<00:15,  1.55s/it]

    Saved enhanced overlay for 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq002 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_20dec21_20xA_L2RA_FlatA_seq002_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq003


Generating Enhanced Mask Overlays:  61%|██████    | 14/23 [00:22<00:14,  1.57s/it]

    Saved enhanced overlay for 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq003 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_20dec21_20xA_L2RA_FlatA_seq003_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq004


Generating Enhanced Mask Overlays:  65%|██████▌   | 15/23 [00:23<00:12,  1.52s/it]

    Saved enhanced overlay for 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq004 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_20dec21_20xA_L2RA_FlatA_seq004_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq005


Generating Enhanced Mask Overlays:  70%|██████▉   | 16/23 [00:25<00:10,  1.56s/it]

    Saved enhanced overlay for 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq005 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_20dec21_20xA_L2RA_FlatA_seq005_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq006


Generating Enhanced Mask Overlays:  74%|███████▍  | 17/23 [00:27<00:10,  1.79s/it]

    Saved enhanced overlay for 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq006 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_20dec21_20xA_L2RA_FlatA_seq006_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq007


Generating Enhanced Mask Overlays:  78%|███████▊  | 18/23 [00:29<00:08,  1.76s/it]

    Saved enhanced overlay for 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq007 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_20dec21_20xA_L2RA_FlatA_seq007_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq008


Generating Enhanced Mask Overlays:  83%|████████▎ | 19/23 [00:31<00:06,  1.69s/it]

    Saved enhanced overlay for 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq008 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_20dec21_20xA_L2RA_FlatA_seq008_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq009


Generating Enhanced Mask Overlays:  87%|████████▋ | 20/23 [00:32<00:05,  1.67s/it]

    Saved enhanced overlay for 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq009 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_20dec21_20xA_L2RA_FlatA_seq009_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq010


Generating Enhanced Mask Overlays:  91%|█████████▏| 21/23 [00:34<00:03,  1.57s/it]

    Saved enhanced overlay for 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq010 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_20dec21_20xA_L2RA_FlatA_seq010_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq011


Generating Enhanced Mask Overlays:  96%|█████████▌| 22/23 [00:35<00:01,  1.51s/it]

    Saved enhanced overlay for 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq011 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_20dec21_20xA_L2RA_FlatA_seq011_enhanced_mask_overlay.png

  Processing sample for enhanced overlay: 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq012


Generating Enhanced Mask Overlays: 100%|██████████| 23/23 [00:36<00:00,  1.60s/it]

    Saved enhanced overlay for 0Pa_A1_20dec21_20xA_L2RA_FlatA_seq012 to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/mask_overlays_enhanced/0Pa_A1_20dec21_20xA_L2RA_FlatA_seq012_enhanced_mask_overlay.png

Enhanced mask overlay visualization complete.

Rule-Based Analysis and Mask Visualization complete!



