In [2]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import os
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage import io, measure
from skimage.morphology import thin
from skimage.measure import regionprops
from scipy import ndimage
from scipy.ndimage import label
import tifffile
from pathlib import Path
import seaborn as sns
from scipy import stats

# Set up matplotlib for better visualization
plt.rcParams['figure.figsize'] = (12, 10)
plt.rcParams['figure.dpi'] = 100
plt.style.use('ggplot')

# Define paths based on your Drive structure
base_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented/flow3_1.4Pa_18h"
output_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/flow3_1.4Pa_18h"

# Create output directory structure
os.makedirs(output_dir, exist_ok=True)
for component in ['Nuclei', 'Cell', 'Membrane', 'Membrane_Adjusted']:
    os.makedirs(os.path.join(output_dir, component), exist_ok=True)

# Extract sample info from filenames
def extract_sample_info(filename):
    """
    Extract sample ID, pressure and component type from filename
    Example: denoised_1.4Pa_U_05mar19_20x_L2R_Flat_seq005_cell_mask
    """
    parts = str(filename).split('_')

    # Extract pressure (0Pa or 1.4Pa)
    pressure = None
    for part in parts:
        if 'Pa' in part:
            pressure = part

    # Extract component type and everything before it for sample_id
    component = None
    component_idx = None

    components = ['Nuclei', 'cell', 'Cadherins', 'membrane']
    for i, part in enumerate(parts):
        if part in components:
            component = part
            component_idx = i
            break

    if component_idx is not None:
        sample_id = '_'.join(parts[:component_idx])

        # Map component names to standardized versions
        if component == 'Nuclei':
            component_type = 'nuclei'
        elif component == 'cell':
            component_type = 'cell'
        elif component == 'Cadherins':
            component_type = 'membrane'
        elif component == 'membrane':
            # Check if this is from the Membrane_Adjusted folder
            if 'adjusted' in filename:
                component_type = 'membrane_adjusted'
            else:
                component_type = 'membrane'

        return {
            'sample_id': sample_id,
            'pressure': pressure,
            'component': component_type
        }

    return None

# Find all mask files
def find_mask_files(base_dir):
    """Find all mask files in the directory structure"""
    mask_files = []

    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file.endswith('_mask.tif') or ('_membrane_mask_adjusted' in file):
                mask_files.append(os.path.join(root, file))

    return mask_files

# Extract morphological features from mask
def analyze_mask(mask_path):
    """Extract morphometric features from a binary mask"""
    try:
        # Read mask
        mask = tifffile.imread(mask_path)

        # Ensure binary mask
        if mask.dtype != bool:
            mask = mask > 0

        # Label connected components
        labeled_mask, num_features = ndimage.label(mask)

        if num_features == 0:
            print(f"No features found in {mask_path}")
            return None

        # Calculate region properties
        regions = regionprops(labeled_mask)

        # Get sample info from filename
        filename = os.path.basename(mask_path)
        sample_info = extract_sample_info(filename)

        if not sample_info:
            print(f"Could not extract sample info from {filename}")
            return None

        features_list = []

        # Extract features for each region in the mask
        for i, region in enumerate(regions):
            # Skip very small regions (likely noise)
            if region.area < 10:
                continue

            # Calculate additional features
            # Circularity = 4π * area / perimeter²
            circularity = 4 * np.pi * region.area / (region.perimeter ** 2) if region.perimeter > 0 else 0

            # Elongation = major_axis_length / minor_axis_length
            elongation = region.major_axis_length / region.minor_axis_length if region.minor_axis_length > 0 else 0

            features = {
                'sample_id': sample_info['sample_id'],
                'pressure': sample_info['pressure'],
                'component': sample_info['component'],
                'region_id': i,
                # Size and shape features
                'area': region.area,
                'perimeter': region.perimeter,
                'equivalent_diameter': region.equivalent_diameter,
                # Elongation features
                'major_axis_length': region.major_axis_length,
                'minor_axis_length': region.minor_axis_length,
                'elongation': elongation,
                'eccentricity': region.eccentricity,
                # Orientation features
                'orientation_degrees': np.degrees(region.orientation),
                # Other shape descriptors
                'solidity': region.solidity,
                'circularity': circularity
            }

            features_list.append(features)

        return features_list

    except Exception as e:
        print(f"Error analyzing mask {mask_path}: {e}")
        return None

# Main analysis function
def analyze_morphology(base_dir, output_dir):
    """Analyze morphological features of all masks and compare between pressures"""
    print("Starting morphological analysis...")

    # Step 1: Find all mask files
    mask_files = find_mask_files(base_dir)
    print(f"Found {len(mask_files)} mask files")

    # Step 2: Extract features from each mask
    all_features = []
    for file in mask_files:
        features = analyze_mask(file)
        if features:
            all_features.extend(features)

    # Step 3: Convert to DataFrame
    if not all_features:
        print("No features could be extracted. Check your mask files.")
        return

    df = pd.DataFrame(all_features)

    # Save raw data
    features_csv = os.path.join(output_dir, "morphological_features.csv")
    df.to_csv(features_csv, index=False)
    print(f"Saved features to {features_csv}")

    # Step 4: Analyze by component type (nuclei, cell, membrane)
    components = df['component'].unique()
    pressures = df['pressure'].unique()

    print(f"Analyzing components: {components}")
    print(f"Comparing pressures: {pressures}")

    # Features to analyze
    features_to_compare = [
        'area', 'perimeter', 'eccentricity', 'elongation',
        'major_axis_length', 'minor_axis_length',
        'orientation_degrees', 'solidity', 'circularity'
    ]

    # Loop through each component type
    for component in components:
        component_df = df[df['component'] == component]

        # Skip if not enough data
        if len(component_df) < 5:
            print(f"Not enough data for {component} analysis")
            continue

        component_dir = os.path.join(output_dir, component.capitalize())

        # Create boxplots for each feature
        for feature in features_to_compare:
            if feature not in component_df.columns:
                continue

            plt.figure(figsize=(10, 6))
            ax = sns.boxplot(x='pressure', y=feature, data=component_df)

            # Add statistical comparison
            if len(pressures) == 2:
                # Perform t-test between groups
                group1 = component_df[component_df['pressure'] == pressures[0]][feature].dropna()
                group2 = component_df[component_df['pressure'] == pressures[1]][feature].dropna()

                if len(group1) > 0 and len(group2) > 0:
                    stat, p_value = stats.ttest_ind(group1, group2, equal_var=False)

                    # Add significance annotation
                    sig_text = f'p = {p_value:.3f}'
                    if p_value < 0.001:
                        sig_text += ' ***'
                    elif p_value < 0.01:
                        sig_text += ' **'
                    elif p_value < 0.05:
                        sig_text += ' *'

                    plt.annotate(sig_text, xy=(0.5, 0.95), xycoords='axes fraction',
                                ha='center', va='center',
                                bbox=dict(boxstyle='round', fc='white', alpha=0.7))

            plt.title(f'{component.capitalize()} {feature.replace("_", " ").title()} by Pressure')
            plt.grid(True, linestyle='--', alpha=0.7)
            plt.tight_layout()
            plt.savefig(os.path.join(component_dir, f"{feature}_boxplot.png"), dpi=300)
            plt.close()

        # Create scatter plots for feature relationships
        scatter_relationships = [
            ('area', 'perimeter'),
            ('major_axis_length', 'minor_axis_length'),
            ('eccentricity', 'circularity'),
            ('eccentricity', 'orientation_degrees'),
            ('elongation', 'area')
        ]

        for x_feature, y_feature in scatter_relationships:
            if x_feature not in component_df.columns or y_feature not in component_df.columns:
                continue

            plt.figure(figsize=(10, 8))
            sns.scatterplot(x=x_feature, y=y_feature, hue='pressure',
                         data=component_df, alpha=0.6, palette='viridis')

            # Add regression lines for each pressure
            for pressure in pressures:
                pressure_data = component_df[component_df['pressure'] == pressure]
                sns.regplot(x=x_feature, y=y_feature, data=pressure_data,
                          scatter=False, label=f'{pressure} trend')

            plt.title(f'{component.capitalize()}: {y_feature.replace("_", " ").title()} vs {x_feature.replace("_", " ").title()}')
            plt.legend()
            plt.grid(True, linestyle='--', alpha=0.7)
            plt.tight_layout()
            plt.savefig(os.path.join(component_dir, f"{y_feature}_vs_{x_feature}_scatter.png"), dpi=300)
            plt.close()

        # Create histograms for key features
        for feature in ['area', 'elongation', 'circularity', 'orientation_degrees']:
            if feature not in component_df.columns:
                continue

            plt.figure(figsize=(12, 6))

            for pressure in pressures:
                data = component_df[component_df['pressure'] == pressure][feature].dropna()
                if len(data) > 0:
                    sns.histplot(data, kde=True, label=pressure, alpha=0.6)

            plt.title(f'{component.capitalize()} {feature.replace("_", " ").title()} Distribution')
            plt.xlabel(feature.replace("_", " ").title())
            plt.legend()
            plt.grid(True, linestyle='--', alpha=0.7)
            plt.tight_layout()
            plt.savefig(os.path.join(component_dir, f"{feature}_histogram.png"), dpi=300)
            plt.close()

    # Create summary statistics
    summary = df.groupby(['component', 'pressure']).agg({
        'area': ['mean', 'std', 'median'],
        'perimeter': ['mean', 'std', 'median'],
        'elongation': ['mean', 'std', 'median'],
        'eccentricity': ['mean', 'std', 'median'],
        'circularity': ['mean', 'std', 'median'],
        'orientation_degrees': ['mean', 'std', 'median'],
    }).reset_index()

    summary.to_csv(os.path.join(output_dir, "morphology_summary_statistics.csv"))

    # Create summary visualization
    summary_features = ['area', 'elongation', 'circularity', 'orientation_degrees']

    fig, axs = plt.subplots(len(components), len(summary_features),
                          figsize=(5*len(summary_features), 4*len(components)))

    for i, comp in enumerate(components):
        for j, feat in enumerate(summary_features):
            # Handle both 1D and 2D axis arrays
            if len(components) == 1:
                ax = axs[j]
            else:
                ax = axs[i, j]

            comp_data = df[df['component'] == comp]
            if len(comp_data) > 0:
                sns.boxplot(x='pressure', y=feat, data=comp_data, ax=ax)
                ax.set_title(f'{comp.capitalize()} {feat.replace("_", " ").title()}')
                ax.set_xlabel('')
                if j == 0:
                    ax.set_ylabel(comp.capitalize())
                else:
                    ax.set_ylabel('')

    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "morphology_summary.png"), dpi=300)
    plt.close()

    print("Morphological analysis complete! Results saved to:", output_dir)

# Run the analysis
analyze_morphology(base_dir, output_dir)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Starting morphological analysis...
Found 32 mask files
Saved features to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/flow3_1.4Pa_18h/morphological_features.csv
Analyzing components: ['nuclei' 'membrane' 'cell']
Comparing pressures: ['0Pa' '1.4Pa']


  plt.tight_layout()
  plt.savefig(os.path.join(component_dir, f"{feature}_histogram.png"), dpi=300)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)


Morphological analysis complete! Results saved to: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/flow3_1.4Pa_18h


In [3]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import os
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage import io, measure
from skimage.morphology import thin
from skimage.measure import regionprops
from scipy import ndimage
from scipy.ndimage import label
import tifffile
from pathlib import Path
import seaborn as sns
from scipy import stats

# Set up matplotlib for better visualization
plt.rcParams['figure.figsize'] = (12, 10)
plt.rcParams['figure.dpi'] = 100
plt.style.use('ggplot')

# Define paths based on your Drive structure
base_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented/flow3_1.4Pa_18h"
output_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/flow3_1.4Pa_18h"

# Create output directory structure
os.makedirs(output_dir, exist_ok=True)
for component in ['Nuclei', 'Cell', 'Membrane', 'Membrane_Adjusted']:
    os.makedirs(os.path.join(output_dir, component), exist_ok=True)

# Extract sample info from filenames
def extract_sample_info(filename):
    """
    Extract sample ID, pressure and component type from filename
    Example: denoised_1.4Pa_U_05mar19_20x_L2R_Flat_seq005_cell_mask
    """
    parts = str(filename).split('_')

    # Extract pressure (0Pa or 1.4Pa)
    pressure = None
    for part in parts:
        if 'Pa' in part:
            pressure = part

    # Extract component type and everything before it for sample_id
    component = None
    component_idx = None

    components = ['Nuclei', 'cell', 'Cadherins', 'membrane']
    for i, part in enumerate(parts):
        if part in components:
            component = part
            component_idx = i
            break

    if component_idx is not None:
        sample_id = '_'.join(parts[:component_idx])

        # Map component names to standardized versions
        if component == 'Nuclei':
            component_type = 'nuclei'
        elif component == 'cell':
            component_type = 'cell'
        elif component == 'Cadherins':
            component_type = 'membrane'
        elif component == 'membrane':
            # Check if this is from the Membrane_Adjusted folder
            if 'adjusted' in filename:
                component_type = 'membrane_adjusted'
            else:
                component_type = 'membrane'

        return {
            'sample_id': sample_id,
            'pressure': pressure,
            'component': component_type
        }

    return None

# Find all mask files
def find_mask_files(base_dir):
    """Find all mask files in the directory structure"""
    mask_files = []

    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file.endswith('_mask.tif') or ('_membrane_mask_adjusted' in file):
                mask_files.append(os.path.join(root, file))

    return mask_files

# Extract morphological features from mask
def analyze_mask(mask_path):
    """Extract morphometric features from a binary mask"""
    try:
        # Read mask
        mask = tifffile.imread(mask_path)

        # Ensure binary mask
        if mask.dtype != bool:
            mask = mask > 0

        # Label connected components
        labeled_mask, num_features = ndimage.label(mask)

        if num_features == 0:
            print(f"No features found in {mask_path}")
            return None

        # Calculate region properties
        regions = regionprops(labeled_mask)

        # Get sample info from filename
        filename = os.path.basename(mask_path)
        sample_info = extract_sample_info(filename)

        if not sample_info:
            print(f"Could not extract sample info from {filename}")
            return None

        features_list = []

        # Extract features for each region in the mask
        for i, region in enumerate(regions):
            # Skip very small regions (likely noise)
            if region.area < 10:
                continue

            # Calculate additional features
            # Circularity = 4π * area / perimeter²
            circularity = 4 * np.pi * region.area / (region.perimeter ** 2) if region.perimeter > 0 else 0

            # Elongation = major_axis_length / minor_axis_length
            elongation = region.major_axis_length / region.minor_axis_length if region.minor_axis_length > 0 else 0

            features = {
                'sample_id': sample_info['sample_id'],
                'pressure': sample_info['pressure'],
                'component': sample_info['component'],
                'region_id': i,
                # Size and shape features
                'area': region.area,
                'perimeter': region.perimeter,
                'equivalent_diameter': region.equivalent_diameter,
                # Elongation features
                'major_axis_length': region.major_axis_length,
                'minor_axis_length': region.minor_axis_length,
                'elongation': elongation,
                'eccentricity': region.eccentricity,
                # Orientation features
                'orientation_degrees': np.degrees(region.orientation),
                # Other shape descriptors
                'solidity': region.solidity,
                'circularity': circularity
            }

            features_list.append(features)

        return features_list

    except Exception as e:
        print(f"Error analyzing mask {mask_path}: {e}")
        return None

# Main analysis function
def analyze_morphology(base_dir, output_dir):
    """Analyze morphological features of all masks and compare between pressures"""
    print("Starting morphological analysis...")

    # Step 1: Find all mask files
    mask_files = find_mask_files(base_dir)
    print(f"Found {len(mask_files)} mask files")

    # Step 2: Extract features from each mask
    all_features = []
    for file in mask_files:
        features = analyze_mask(file)
        if features:
            all_features.extend(features)

    # Step 3: Convert to DataFrame
    if not all_features:
        print("No features could be extracted. Check your mask files.")
        return

    df = pd.DataFrame(all_features)

    # Save raw data
    features_csv = os.path.join(output_dir, "morphological_features.csv")
    df.to_csv(features_csv, index=False)
    print(f"Saved features to {features_csv}")

    # Step 4: Analyze by component type (nuclei, cell, membrane)
    components = df['component'].unique()
    pressures = df['pressure'].unique()

    print(f"Analyzing components: {components}")
    print(f"Comparing pressures: {pressures}")

    # Features to analyze
    features_to_compare = [
        'area', 'perimeter', 'eccentricity', 'elongation',
        'major_axis_length', 'minor_axis_length',
        'orientation_degrees', 'solidity', 'circularity'
    ]

    # Loop through each component type
    for component in components:
        component_df = df[df['component'] == component]

        # Skip if not enough data
        if len(component_df) < 5:
            print(f"Not enough data for {component} analysis")
            continue

        component_dir = os.path.join(output_dir, component.capitalize())

        # Create boxplots for each feature
        for feature in features_to_compare:
            if feature not in component_df.columns:
                continue

            plt.figure(figsize=(10, 6))
            ax = sns.boxplot(x='pressure', y=feature, data=component_df)

            # Add statistical comparison
            if len(pressures) == 2:
                # Perform t-test between groups
                group1 = component_df[component_df['pressure'] == pressures[0]][feature].dropna()
                group2 = component_df[component_df['pressure'] == pressures[1]][feature].dropna()

                if len(group1) > 0 and len(group2) > 0:
                    stat, p_value = stats.ttest_ind(group1, group2, equal_var=False)

                    # Add significance annotation
                    sig_text = f'p = {p_value:.3f}'
                    if p_value < 0.001:
                        sig_text += ' ***'
                    elif p_value < 0.01:
                        sig_text += ' **'
                    elif p_value < 0.05:
                        sig_text += ' *'

                    plt.annotate(sig_text, xy=(0.5, 0.95), xycoords='axes fraction',
                                ha='center', va='center',
                                bbox=dict(boxstyle='round', fc='white', alpha=0.7))

            plt.title(f'{component.capitalize()} {feature.replace("_", " ").title()} by Pressure')
            plt.grid(True, linestyle='--', alpha=0.7)
            plt.tight_layout()
            plt.savefig(os.path.join(component_dir, f"{feature}_boxplot.png"), dpi=300)
            plt.close()

        # Create scatter plots for feature relationships
        scatter_relationships = [
            ('area', 'perimeter'),
            ('major_axis_length', 'minor_axis_length'),
            ('eccentricity', 'circularity'),
            ('eccentricity', 'orientation_degrees'),
            ('elongation', 'area')
        ]

        for x_feature, y_feature in scatter_relationships:
            if x_feature not in component_df.columns or y_feature not in component_df.columns:
                continue

            plt.figure(figsize=(10, 8))
            sns.scatterplot(x=x_feature, y=y_feature, hue='pressure',
                         data=component_df, alpha=0.6, palette='viridis')

            # Add regression lines for each pressure
            for pressure in pressures:
                pressure_data = component_df[component_df['pressure'] == pressure]
                sns.regplot(x=x_feature, y=y_feature, data=pressure_data,
                          scatter=False, label=f'{pressure} trend')

            plt.title(f'{component.capitalize()}: {y_feature.replace("_", " ").title()} vs {x_feature.replace("_", " ").title()}')
            plt.legend()
            plt.grid(True, linestyle='--', alpha=0.7)
            plt.tight_layout()
            plt.savefig(os.path.join(component_dir, f"{y_feature}_vs_{x_feature}_scatter.png"), dpi=300)
            plt.close()

        # Create histograms for key features
        for feature in ['area', 'elongation', 'circularity', 'orientation_degrees']:
            if feature not in component_df.columns:
                continue

            plt.figure(figsize=(12, 6))

            for pressure in pressures:
                data = component_df[component_df['pressure'] == pressure][feature].dropna()
                if len(data) > 0:
                    sns.histplot(data, kde=True, label=pressure, alpha=0.6)

            plt.title(f'{component.capitalize()} {feature.replace("_", " ").title()} Distribution')
            plt.xlabel(feature.replace("_", " ").title())
            plt.legend()
            plt.grid(True, linestyle='--', alpha=0.7)
            plt.tight_layout()
            plt.savefig(os.path.join(component_dir, f"{feature}_histogram.png"), dpi=300)
            plt.close()

    # Create summary statistics
    summary = df.groupby(['component', 'pressure']).agg({
        'area': ['mean', 'std', 'median'],
        'perimeter': ['mean', 'std', 'median'],
        'elongation': ['mean', 'std', 'median'],
        'eccentricity': ['mean', 'std', 'median'],
        'circularity': ['mean', 'std', 'median'],
        'orientation_degrees': ['mean', 'std', 'median'],
    }).reset_index()

    summary.to_csv(os.path.join(output_dir, "morphology_summary_statistics.csv"))

    # Create summary visualization
    summary_features = ['area', 'elongation', 'circularity', 'orientation_degrees']

    fig, axs = plt.subplots(len(components), len(summary_features),
                          figsize=(5*len(summary_features), 4*len(components)))

    for i, comp in enumerate(components):
        for j, feat in enumerate(summary_features):
            # Handle both 1D and 2D axis arrays
            if len(components) == 1:
                ax = axs[j]
            else:
                ax = axs[i, j]

            comp_data = df[df['component'] == comp]
            if len(comp_data) > 0:
                sns.boxplot(x='pressure', y=feat, data=comp_data, ax=ax)
                ax.set_title(f'{comp.capitalize()} {feat.replace("_", " ").title()}')
                ax.set_xlabel('')
                if j == 0:
                    ax.set_ylabel(comp.capitalize())
                else:
                    ax.set_ylabel('')

    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "morphology_summary.png"), dpi=300)
    plt.close()

    print("Morphological analysis complete! Results saved to:", output_dir)

# Run the analysis
analyze_morphology(base_dir, output_dir)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Starting morphological analysis...
Found 32 mask files
Saved features to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/flow3_1.4Pa_18h/morphological_features.csv
Analyzing components: ['nuclei' 'membrane' 'cell']
Comparing pressures: ['0Pa' '1.4Pa']


  plt.tight_layout()
  plt.savefig(os.path.join(component_dir, f"{feature}_histogram.png"), dpi=300)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)


Morphological analysis complete! Results saved to: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/flow3_1.4Pa_18h


In [4]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import os
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage import io, measure
from skimage.morphology import thin
from skimage.measure import regionprops
from scipy import ndimage
from scipy.ndimage import label
import tifffile
from pathlib import Path
import seaborn as sns
from scipy import stats

# Set up matplotlib for better visualization
plt.rcParams['figure.figsize'] = (12, 10)
plt.rcParams['figure.dpi'] = 100
plt.style.use('ggplot')

# Define paths based on your Drive structure
base_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented/flow3_1.4Pa_18h"
output_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/flow3_1.4Pa_18h"

# Create output directory structure
os.makedirs(output_dir, exist_ok=True)
for component in ['Nuclei', 'Cell', 'Membrane', 'Membrane_Adjusted']:
    os.makedirs(os.path.join(output_dir, component), exist_ok=True)

# Extract sample info from filenames
def extract_sample_info(filename):
    """
    Extract sample ID, pressure and component type from filename
    Example: denoised_1.4Pa_U_05mar19_20x_L2R_Flat_seq005_cell_mask
    """
    parts = str(filename).split('_')

    # Extract pressure (0Pa or 1.4Pa)
    pressure = None
    for part in parts:
        if 'Pa' in part:
            pressure = part

    # Extract component type and everything before it for sample_id
    component = None
    component_idx = None

    components = ['Nuclei', 'cell', 'Cadherins', 'membrane']
    for i, part in enumerate(parts):
        if part in components:
            component = part
            component_idx = i
            break

    if component_idx is not None:
        sample_id = '_'.join(parts[:component_idx])

        # Map component names to standardized versions
        if component == 'Nuclei':
            component_type = 'nuclei'
        elif component == 'cell':
            component_type = 'cell'
        elif component == 'Cadherins':
            component_type = 'membrane'
        elif component == 'membrane':
            # Check if this is from the Membrane_Adjusted folder
            if 'adjusted' in filename:
                component_type = 'membrane_adjusted'
            else:
                component_type = 'membrane'

        return {
            'sample_id': sample_id,
            'pressure': pressure,
            'component': component_type
        }

    return None

# Find all mask files, prioritizing Membrane_Adjusted over Membrane
def find_mask_files(base_dir):
    """Find mask files, prioritizing Membrane_Adjusted over Membrane"""
    mask_files = []

    # Find all mask files in different categories
    all_mask_files = []
    membrane_files = []
    membrane_adjusted_files = []

    for root, dirs, files in os.walk(base_dir):
        folder_name = os.path.basename(root)

        for file in files:
            if file.endswith('_mask.tif') or ('_membrane_mask_adjusted' in file):
                file_path = os.path.join(root, file)

                # Categorize by folder
                if folder_name.lower() == 'membrane_adjusted':
                    membrane_adjusted_files.append(file_path)
                elif folder_name.lower() == 'membrane':
                    membrane_files.append(file_path)
                else:
                    all_mask_files.append(file_path)

    # Add all non-membrane files
    mask_files.extend(all_mask_files)

    # Add membrane_adjusted files if available, otherwise add membrane files
    if membrane_adjusted_files:
        print(f"Using {len(membrane_adjusted_files)} files from Membrane_Adjusted")
        mask_files.extend(membrane_adjusted_files)
    else:
        print(f"No Membrane_Adjusted files found, using {len(membrane_files)} files from Membrane")
        mask_files.extend(membrane_files)

    return mask_files

# Extract morphological features from mask
def analyze_mask(mask_path):
    """Extract morphometric features from a binary mask"""
    try:
        # Read mask
        mask = tifffile.imread(mask_path)

        # Ensure binary mask
        if mask.dtype != bool:
            mask = mask > 0

        # Label connected components
        labeled_mask, num_features = ndimage.label(mask)

        if num_features == 0:
            print(f"No features found in {mask_path}")
            return None

        # Calculate region properties
        regions = regionprops(labeled_mask)

        # Get sample info from filename
        filename = os.path.basename(mask_path)
        sample_info = extract_sample_info(filename)

        if not sample_info:
            print(f"Could not extract sample info from {filename}")
            return None

        features_list = []

        # Extract features for each region in the mask
        for i, region in enumerate(regions):
            # Skip very small regions (likely noise)
            if region.area < 10:
                continue

            # Calculate additional features
            # Circularity = 4π * area / perimeter²
            circularity = 4 * np.pi * region.area / (region.perimeter ** 2) if region.perimeter > 0 else 0

            # Elongation = major_axis_length / minor_axis_length
            elongation = region.major_axis_length / region.minor_axis_length if region.minor_axis_length > 0 else 0

            # Normalize orientation to 0-90 degrees (relevant to flow direction)
            # Original orientation from skimage is between -90 and 90 degrees
            orientation_deg = np.degrees(region.orientation)
            # Normalize to 0-90 degrees range
            norm_orientation = abs(orientation_deg)

            features = {
                'sample_id': sample_info['sample_id'],
                'pressure': sample_info['pressure'],
                'component': sample_info['component'],
                'region_id': i,
                # Size and shape features
                'area': region.area,
                'perimeter': region.perimeter,
                'equivalent_diameter': region.equivalent_diameter,
                # Elongation features
                'major_axis_length': region.major_axis_length,
                'minor_axis_length': region.minor_axis_length,
                'elongation': elongation,
                'eccentricity': region.eccentricity,
                # Orientation features
                'orientation_degrees': norm_orientation,  # Normalized to 0-90
                # Other shape descriptors
                'solidity': region.solidity,
                'circularity': circularity
            }

            features_list.append(features)

        return features_list

    except Exception as e:
        print(f"Error analyzing mask {mask_path}: {e}")
        return None

# Main analysis function
def analyze_morphology(base_dir, output_dir):
    """Analyze morphological features of all masks and compare between pressures"""
    print("Starting morphological analysis...")

    # Step 1: Find all mask files
    mask_files = find_mask_files(base_dir)
    print(f"Found {len(mask_files)} mask files")

    # Step 2: Extract features from each mask
    all_features = []
    for file in mask_files:
        features = analyze_mask(file)
        if features:
            all_features.extend(features)

    # Step 3: Convert to DataFrame
    if not all_features:
        print("No features could be extracted. Check your mask files.")
        return

    df = pd.DataFrame(all_features)

    # Save raw data
    features_csv = os.path.join(output_dir, "morphological_features.csv")
    df.to_csv(features_csv, index=False)
    print(f"Saved features to {features_csv}")

    # Step 4: Analyze by component type (nuclei, cell, membrane)
    components = df['component'].unique()
    pressures = df['pressure'].unique()

    print(f"Analyzing components: {components}")
    print(f"Comparing pressures: {pressures}")

    # Features to analyze
    features_to_compare = [
        'area', 'perimeter', 'eccentricity', 'elongation',
        'major_axis_length', 'minor_axis_length',
        'orientation_degrees', 'solidity', 'circularity'
    ]

    # Loop through each component type
    for component in components:
        component_df = df[df['component'] == component]

        # Skip if not enough data
        if len(component_df) < 5:
            print(f"Not enough data for {component} analysis")
            continue

        component_dir = os.path.join(output_dir, component.capitalize())

        # Create violin plots for each feature
        for feature in features_to_compare:
            if feature not in component_df.columns:
                continue

            plt.figure(figsize=(10, 6))
            # Use violin plot instead of boxplot
            ax = sns.violinplot(x='pressure', y=feature, data=component_df)

            # Add statistical comparison
            if len(pressures) == 2:
                # Perform t-test between groups
                group1 = component_df[component_df['pressure'] == pressures[0]][feature].dropna()
                group2 = component_df[component_df['pressure'] == pressures[1]][feature].dropna()

                if len(group1) > 0 and len(group2) > 0:
                    stat, p_value = stats.ttest_ind(group1, group2, equal_var=False)

                    # Add significance annotation
                    sig_text = f'p = {p_value:.3f}'
                    if p_value < 0.001:
                        sig_text += ' ***'
                    elif p_value < 0.01:
                        sig_text += ' **'
                    elif p_value < 0.05:
                        sig_text += ' *'

                    plt.annotate(sig_text, xy=(0.5, 0.95), xycoords='axes fraction',
                                ha='center', va='center',
                                bbox=dict(boxstyle='round', fc='white', alpha=0.7))

            plt.title(f'{component.capitalize()} {feature.replace("_", " ").title()} by Pressure')
            plt.grid(True, linestyle='--', alpha=0.7)
            plt.tight_layout()
            plt.savefig(os.path.join(component_dir, f"{feature}_violinplot.png"), dpi=300)
            plt.close()

        # Create scatter plots for feature relationships
        scatter_relationships = [
            ('area', 'perimeter'),
            ('major_axis_length', 'minor_axis_length'),
            ('eccentricity', 'circularity'),
            ('eccentricity', 'orientation_degrees'),
            ('elongation', 'area')
        ]

        for x_feature, y_feature in scatter_relationships:
            if x_feature not in component_df.columns or y_feature not in component_df.columns:
                continue

            plt.figure(figsize=(10, 8))
            sns.scatterplot(x=x_feature, y=y_feature, hue='pressure',
                         data=component_df, alpha=0.6, palette='viridis')

            # Add regression lines for each pressure
            for pressure in pressures:
                pressure_data = component_df[component_df['pressure'] == pressure]
                sns.regplot(x=x_feature, y=y_feature, data=pressure_data,
                          scatter=False, label=f'{pressure} trend')

            plt.title(f'{component.capitalize()}: {y_feature.replace("_", " ").title()} vs {x_feature.replace("_", " ").title()}')
            plt.legend()
            plt.grid(True, linestyle='--', alpha=0.7)
            plt.tight_layout()
            plt.savefig(os.path.join(component_dir, f"{y_feature}_vs_{x_feature}_scatter.png"), dpi=300)
            plt.close()

        # Create histograms for key features
        for feature in ['area', 'elongation', 'circularity', 'orientation_degrees']:
            if feature not in component_df.columns:
                continue

            plt.figure(figsize=(12, 6))

            for pressure in pressures:
                data = component_df[component_df['pressure'] == pressure][feature].dropna()
                if len(data) > 0:
                    sns.histplot(data, kde=True, label=pressure, alpha=0.6)

            plt.title(f'{component.capitalize()} {feature.replace("_", " ").title()} Distribution')
            plt.xlabel(feature.replace("_", " ").title())
            plt.legend()
            plt.grid(True, linestyle='--', alpha=0.7)
            plt.tight_layout()
            plt.savefig(os.path.join(component_dir, f"{feature}_histogram.png"), dpi=300)
            plt.close()

    # Create summary statistics
    summary = df.groupby(['component', 'pressure']).agg({
        'area': ['mean', 'std', 'median'],
        'perimeter': ['mean', 'std', 'median'],
        'elongation': ['mean', 'std', 'median'],
        'eccentricity': ['mean', 'std', 'median'],
        'circularity': ['mean', 'std', 'median'],
        'orientation_degrees': ['mean', 'std', 'median'],
    }).reset_index()

    summary.to_csv(os.path.join(output_dir, "morphology_summary_statistics.csv"))

    # Create summary visualization
    summary_features = ['area', 'elongation', 'circularity', 'orientation_degrees']

    fig, axs = plt.subplots(len(components), len(summary_features),
                          figsize=(5*len(summary_features), 4*len(components)))

    for i, comp in enumerate(components):
        for j, feat in enumerate(summary_features):
            # Handle both 1D and 2D axis arrays
            if len(components) == 1:
                ax = axs[j]
            else:
                ax = axs[i, j]

            comp_data = df[df['component'] == comp]
            if len(comp_data) > 0:
                # Use violin plot instead of boxplot for summary plot
                sns.violinplot(x='pressure', y=feat, data=comp_data, ax=ax)
                ax.set_title(f'{comp.capitalize()} {feat.replace("_", " ").title()}')
                ax.set_xlabel('')
                if j == 0:
                    ax.set_ylabel(comp.capitalize())
                else:
                    ax.set_ylabel('')

    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "morphology_summary.png"), dpi=300)
    plt.close()

    print("Morphological analysis complete! Results saved to:", output_dir)

# Run the analysis
analyze_morphology(base_dir, output_dir)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Starting morphological analysis...
Using 8 files from Membrane_Adjusted
Found 24 mask files
Saved features to /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/flow3_1.4Pa_18h/morphological_features.csv
Analyzing components: ['nuclei' 'cell' 'membrane']
Comparing pressures: ['0Pa' '1.4Pa']


  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  plt.tight_layout()
  plt.savefig(os.path.join(component_dir, f"{feature}_histogram.png"), dpi=300)


Morphological analysis complete! Results saved to: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/flow3_1.4Pa_18h


In [5]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import os
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage import io, measure
from skimage.morphology import thin
from skimage.measure import regionprops
from scipy import ndimage
from scipy.ndimage import label
import tifffile
from pathlib import Path
import seaborn as sns
from scipy import stats

# Set up matplotlib for better visualization
plt.rcParams['figure.figsize'] = (12, 10)
plt.rcParams['figure.dpi'] = 100
plt.style.use('ggplot')

# Define paths based on your Drive structure
base_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented/flow3_1.4Pa_18h"
output_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/flow3_1.4Pa_18h"

# Create output directory structure
os.makedirs(output_dir, exist_ok=True)
for component in ['Nuclei', 'Cell', 'Membrane', 'Membrane_Adjusted']:
    os.makedirs(os.path.join(output_dir, component), exist_ok=True)

# Extract sample info from filenames
def extract_sample_info(filename):
    """
    Extract sample ID, pressure and component type from filename
    Example: denoised_1.4Pa_U_05mar19_20x_L2R_Flat_seq005_cell_mask
    """
    parts = str(filename).split('_')

    # Extract pressure (0Pa or 1.4Pa)
    pressure = None
    for part in parts:
        if 'Pa' in part:
            pressure = part

    # Debug: print filename for troubleshooting
    print(f"Processing file: {filename}")

    # Check for cell mask specifically
    if '_cell_mask' in filename:
        return {
            'sample_id': '_'.join(parts[:-2]),  # everything before "cell_mask"
            'pressure': pressure,
            'component': 'cell'
        }

    # Extract component type and everything before it for sample_id
    component = None
    component_idx = None

    components = ['Nuclei', 'nuclei', 'cell', 'Cadherins', 'membrane']
    for i, part in enumerate(parts):
        if part.lower() in [c.lower() for c in components]:
            component = part
            component_idx = i
            break

    if component_idx is not None:
        sample_id = '_'.join(parts[:component_idx])

        # Map component names to standardized versions
        component_lower = component.lower()
        if component_lower == 'nuclei':
            component_type = 'nuclei'
        elif component_lower == 'cell':
            component_type = 'cell'
        elif component_lower == 'cadherins':
            component_type = 'membrane'
        elif component_lower == 'membrane':
            # Check if this is from the Membrane_Adjusted folder
            if 'adjusted' in filename:
                component_type = 'membrane_adjusted'
            else:
                component_type = 'membrane'

        return {
            'sample_id': sample_id,
            'pressure': pressure,
            'component': component_type
        }

    # If we got here, we couldn't identify the component
    print(f"WARNING: Could not extract component from {filename}")
    return None

# Find all mask files, prioritizing Membrane_Adjusted over Membrane
def find_mask_files(base_dir):
    """Find mask files by component type"""
    # Initialize dictionaries to store mask files by component
    mask_files_by_component = {
        'nuclei': [],
        'cell': [],
        'membrane': [],
        'membrane_adjusted': []
    }

    for root, dirs, files in os.walk(base_dir):
        folder_name = os.path.basename(root)

        for file in files:
            if file.endswith('_mask.tif') or ('_membrane_mask_adjusted' in file):
                file_path = os.path.join(root, file)

                # Try to determine component from filename
                sample_info = extract_sample_info(file)
                if sample_info and sample_info['component'] in mask_files_by_component:
                    mask_files_by_component[sample_info['component']].append(file_path)

    # Print summary
    for component, files in mask_files_by_component.items():
        print(f"Found {len(files)} {component} mask files")

    # Combine all files, but use membrane_adjusted instead of membrane if available
    all_mask_files = []
    all_mask_files.extend(mask_files_by_component['nuclei'])
    all_mask_files.extend(mask_files_by_component['cell'])

    # For membrane, prefer membrane_adjusted if available
    if mask_files_by_component['membrane_adjusted']:
        print("Using membrane_adjusted files (preferred)")
        all_mask_files.extend(mask_files_by_component['membrane_adjusted'])
    else:
        print("Using membrane files (fallback)")
        all_mask_files.extend(mask_files_by_component['membrane'])

    return all_mask_files, mask_files_by_component

# Extract morphological features from mask
def analyze_mask(mask_path):
    """Extract morphometric features from a binary mask"""
    try:
        # Read mask
        mask = tifffile.imread(mask_path)

        # Ensure binary mask
        if mask.dtype != bool:
            mask = mask > 0

        # Label connected components
        labeled_mask, num_features = ndimage.label(mask)

        if num_features == 0:
            print(f"No features found in {mask_path}")
            return None

        # Calculate region properties
        regions = regionprops(labeled_mask)

        # Get sample info from filename
        filename = os.path.basename(mask_path)
        sample_info = extract_sample_info(filename)

        if not sample_info:
            print(f"Could not extract sample info from {filename}")
            return None

        features_list = []

        # Extract features for each region in the mask
        for i, region in enumerate(regions):
            # Skip very small regions (likely noise)
            if region.area < 10:
                continue

            # Calculate additional features
            # Circularity = 4π * area / perimeter²
            circularity = 4 * np.pi * region.area / (region.perimeter ** 2) if region.perimeter > 0 else 0

            # Elongation = major_axis_length / minor_axis_length
            elongation = region.major_axis_length / region.minor_axis_length if region.minor_axis_length > 0 else 0

            # Normalize orientation to 0-90 degrees (relevant to flow direction)
            # Original orientation from skimage is between -90 and 90 degrees
            orientation_deg = np.degrees(region.orientation)
            # Normalize to 0-90 degrees range (absolute value)
            norm_orientation = abs(orientation_deg)

            features = {
                'sample_id': sample_info['sample_id'],
                'pressure': sample_info['pressure'],
                'component': sample_info['component'],
                'region_id': i,
                # Size and shape features
                'area': region.area,
                'perimeter': region.perimeter,
                'equivalent_diameter': region.equivalent_diameter,
                # Elongation features
                'major_axis_length': region.major_axis_length,
                'minor_axis_length': region.minor_axis_length,
                'elongation': elongation,
                'eccentricity': region.eccentricity,
                # Orientation features
                'orientation_degrees': norm_orientation,  # Normalized to 0-90
                # Other shape descriptors
                'solidity': region.solidity,
                'circularity': circularity
            }

            features_list.append(features)

        return features_list

    except Exception as e:
        print(f"Error analyzing mask {mask_path}: {e}")
        return None

# Create normalized violin plot
def create_violin_plot(data, x, y, hue=None, title=None, output_path=None, normalize=False):
    """Create violin plot, with option for normalized values"""
    plt.figure(figsize=(10, 6))

    if normalize:
        # Normalize the data for plotting percentages
        y_data = data[y].copy()
        groups = data[x].unique()
        normalized_data = data.copy()

        for group in groups:
            group_data = y_data[data[x] == group]
            group_min = group_data.min()
            group_max = group_data.max()
            # Normalize to 0-100% scale
            normalized_data.loc[data[x] == group, y] = ((y_data[data[x] == group] - group_min) /
                                                       (group_max - group_min)) * 100 if (group_max - group_min) > 0 else 0

        # Use normalized data for plotting
        plot_data = normalized_data
        y_label = f"{y.replace('_', ' ').title()} (%)"
    else:
        # Use original data
        plot_data = data
        y_label = y.replace('_', ' ').title()

    # Create violin plot
    ax = sns.violinplot(x=x, y=y, data=plot_data, hue=hue)

    # Add statistical comparison if applicable
    pressures = plot_data[x].unique()
    if len(pressures) == 2:
        # Perform t-test between groups
        group1 = plot_data[plot_data[x] == pressures[0]][y].dropna()
        group2 = plot_data[plot_data[x] == pressures[1]][y].dropna()

        if len(group1) > 0 and len(group2) > 0:
            try:
                stat, p_value = stats.ttest_ind(group1, group2, equal_var=False)

                # Add significance annotation
                sig_text = f'p = {p_value:.3f}'
                if p_value < 0.001:
                    sig_text += ' ***'
                elif p_value < 0.01:
                    sig_text += ' **'
                elif p_value < 0.05:
                    sig_text += ' *'

                plt.annotate(sig_text, xy=(0.5, 0.95), xycoords='axes fraction',
                            ha='center', va='center',
                            bbox=dict(boxstyle='round', fc='white', alpha=0.7))
            except Exception as e:
                plt.annotate(f"Stats error: {str(e)}", xy=(0.5, 0.95), xycoords='axes fraction',
                            ha='center', va='center',
                            bbox=dict(boxstyle='round', fc='white', alpha=0.7))

    plt.title(title if title else f'{y.replace("_", " ").title()} by {x.title()}')
    plt.ylabel(y_label)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()

    if output_path:
        plt.savefig(output_path, dpi=300)

    plt.close()

# Create histogram with normalized option
def create_histogram(data, feature, hue, title=None, output_path=None, normalize=False):
    """Create histogram with normalized option"""
    plt.figure(figsize=(12, 6))

    groups = data[hue].unique()

    if normalize:
        # Create percentage-based histogram
        for group in groups:
            group_data = data[data[hue] == group][feature].dropna()
            if len(group_data) > 0:
                # Use density=True for percentage
                sns.histplot(group_data, kde=True, label=group, alpha=0.6, stat="density")
        plt.ylabel("Density")
    else:
        # Standard count histogram
        for group in groups:
            group_data = data[data[hue] == group][feature].dropna()
            if len(group_data) > 0:
                sns.histplot(group_data, kde=True, label=group, alpha=0.6)
        plt.ylabel("Count")

    plt.title(title if title else f'{feature.replace("_", " ").title()} Distribution')
    plt.xlabel(feature.replace("_", " ").title())
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()

    if output_path:
        plt.savefig(output_path, dpi=300)

    plt.close()

# Main analysis function
def analyze_morphology(base_dir, output_dir):
    """Analyze morphological features of all masks and compare between pressures"""
    print("Starting morphological analysis...")

    # Step 1: Find all mask files by component
    mask_files, mask_files_by_component = find_mask_files(base_dir)
    print(f"Found {len(mask_files)} total mask files")

    # Step 2: Extract features from each mask
    all_features = []
    for file in mask_files:
        features = analyze_mask(file)
        if features:
            all_features.extend(features)

    # Step 3: Convert to DataFrame
    if not all_features:
        print("No features could be extracted. Check your mask files.")
        return

    df = pd.DataFrame(all_features)

    # Debug: print dataset summary
    print("\nDataset summary:")
    print(f"Total features: {len(df)}")
    print(df['component'].value_counts())
    print(df['pressure'].value_counts())

    # Validate data - check for empty component groups
    for component in df['component'].unique():
        component_data = df[df['component'] == component]
        print(f"\n{component} data summary:")
        print(f"  Total records: {len(component_data)}")
        print(f"  Unique regions: {component_data['region_id'].nunique()}")
        print(f"  Pressure distribution: {component_data['pressure'].value_counts().to_dict()}")

        # Check for key metrics
        for metric in ['area', 'perimeter', 'elongation', 'orientation_degrees']:
            if metric in component_data.columns:
                print(f"  {metric}: mean={component_data[metric].mean():.2f}, min={component_data[metric].min():.2f}, max={component_data[metric].max():.2f}")

    # Save raw data
    features_csv = os.path.join(output_dir, "morphological_features.csv")
    df.to_csv(features_csv, index=False)
    print(f"Saved features to {features_csv}")

    # Step 4: Analyze by component type (nuclei, cell, membrane)
    components = df['component'].unique()
    pressures = df['pressure'].unique()

    print(f"Analyzing components: {components}")
    print(f"Comparing pressures: {pressures}")

    # Features to analyze
    features_to_compare = [
        'area', 'perimeter', 'eccentricity', 'elongation',
        'major_axis_length', 'minor_axis_length',
        'orientation_degrees', 'solidity', 'circularity'
    ]

    # Loop through each component type
    for component in components:
        component_df = df[df['component'] == component]

        # Skip if not enough data
        if len(component_df) < 5:
            print(f"Not enough data for {component} analysis")
            continue

        component_dir = os.path.join(output_dir, component.capitalize())

        # Create violin plots for each feature (both regular and normalized)
        for feature in features_to_compare:
            if feature not in component_df.columns:
                continue

            # Regular violin plot
            create_violin_plot(
                data=component_df,
                x='pressure',
                y=feature,
                title=f'{component.capitalize()} {feature.replace("_", " ").title()} by Pressure',
                output_path=os.path.join(component_dir, f"{feature}_violinplot.png"),
                normalize=False
            )

            # Normalized violin plot
            create_violin_plot(
                data=component_df,
                x='pressure',
                y=feature,
                title=f'{component.capitalize()} {feature.replace("_", " ").title()} by Pressure (Normalized)',
                output_path=os.path.join(component_dir, f"{feature}_violinplot_normalized.png"),
                normalize=True
            )

        # Create scatter plots for feature relationships
        scatter_relationships = [
            ('area', 'perimeter'),
            ('major_axis_length', 'minor_axis_length'),
            ('eccentricity', 'circularity'),
            ('eccentricity', 'orientation_degrees'),
            ('elongation', 'area')
        ]

        for x_feature, y_feature in scatter_relationships:
            if x_feature not in component_df.columns or y_feature not in component_df.columns:
                continue

            plt.figure(figsize=(10, 8))

            # Check if we have enough data points
            has_data = False
            for pressure in pressures:
                pressure_data = component_df[component_df['pressure'] == pressure]
                if len(pressure_data) > 0:
                    has_data = True
                    # Scatter plot
                    sns.scatterplot(x=x_feature, y=y_feature, data=pressure_data,
                                  label=pressure, alpha=0.6)

                    # Regression line if we have enough points
                    if len(pressure_data) > 2:
                        sns.regplot(x=x_feature, y=y_feature, data=pressure_data,
                                  scatter=False, label=f'{pressure} trend')

            if has_data:
                plt.title(f'{component.capitalize()}: {y_feature.replace("_", " ").title()} vs {x_feature.replace("_", " ").title()}')
                plt.legend()
                plt.grid(True, linestyle='--', alpha=0.7)
                plt.tight_layout()
                plt.savefig(os.path.join(component_dir, f"{y_feature}_vs_{x_feature}_scatter.png"), dpi=300)
            else:
                print(f"Not enough data for {component} {y_feature} vs {x_feature} scatter plot")

            plt.close()

        # Create histograms for key features (both regular and normalized)
        for feature in ['area', 'elongation', 'circularity', 'orientation_degrees']:
            if feature not in component_df.columns:
                continue

            # Regular histogram
            create_histogram(
                data=component_df,
                feature=feature,
                hue='pressure',
                title=f'{component.capitalize()} {feature.replace("_", " ").title()} Distribution',
                output_path=os.path.join(component_dir, f"{feature}_histogram.png"),
                normalize=False
            )

            # Normalized histogram
            create_histogram(
                data=component_df,
                feature=feature,
                hue='pressure',
                title=f'{component.capitalize()} {feature.replace("_", " ").title()} Distribution (Normalized)',
                output_path=os.path.join(component_dir, f"{feature}_histogram_normalized.png"),
                normalize=True
            )

    # Create summary statistics
    summary = df.groupby(['component', 'pressure']).agg({
        'area': ['mean', 'std', 'median', 'count'],
        'perimeter': ['mean', 'std', 'median'],
        'elongation': ['mean', 'std', 'median'],
        'eccentricity': ['mean', 'std', 'median'],
        'circularity': ['mean', 'std', 'median'],
        'orientation_degrees': ['mean', 'std', 'median'],
    }).reset_index()

    summary.to_csv(os.path.join(output_dir, "morphology_summary_statistics.csv"))

    # Create separate summary visualizations for each component
    summary_features = ['area', 'elongation', 'circularity', 'orientation_degrees']

    for component in components:
        component_df = df[df['component'] == component]
        if len(component_df) < 5:
            continue

        fig, axs = plt.subplots(1, len(summary_features), figsize=(5*len(summary_features), 4))

        for j, feat in enumerate(summary_features):
            if len(summary_features) == 1:
                ax = axs
            else:
                ax = axs[j]

            # Use violin plot
            sns.violinplot(x='pressure', y=feat, data=component_df, ax=ax)
            ax.set_title(f'{feat.replace("_", " ").title()}')
            ax.set_xlabel('Pressure')

            # Add sample counts
            for i, pressure in enumerate(sorted(component_df['pressure'].unique())):
                count = len(component_df[component_df['pressure'] == pressure])
                ax.annotate(f'n={count}', xy=(i, ax.get_ylim()[0] + (ax.get_ylim()[1] - ax.get_ylim()[0])*0.05),
                           ha='center', va='bottom')

        plt.suptitle(f'{component.capitalize()} Morphology Summary', fontsize=16)
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, f"{component}_morphology_summary.png"), dpi=300)
        plt.close()

    print("Morphological analysis complete! Results saved to:", output_dir)

# Run the analysis
analyze_morphology(base_dir, output_dir)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Starting morphological analysis...
Processing file: denoised_0Pa_U_05mar19_20x_L2RA_Flat_seq001_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_0Pa_U_05mar19_20x_L2RA_Flat_seq002_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_0Pa_U_05mar19_20x_L2RA_Flat_seq003_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_1.4Pa_U_05mar19_20x_L2R_Flat_seq001_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_1.4Pa_U_05mar19_20x_L2R_Flat_seq002_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_1.4Pa_U_05mar19_20x_L2R_Flat_seq003_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_1.4Pa_U_05mar19_20x_L2R_Flat_seq004_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_1.4Pa_U_05mar19_20x_L2R_Flat_seq005_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_0Pa_U_05mar19_20x_L2RA_Flat_seq001_Ca

  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
  plt.tight_layout()
  plt.savefig(output_path, dpi=300)
  plt.tight_layout()
  plt.savefig(output_path, dpi=300)


Morphological analysis complete! Results saved to: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/flow3_1.4Pa_18h


In [7]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import os
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage import io, measure, segmentation
from skimage.segmentation import watershed
from skimage.measure import regionprops
from skimage.feature import peak_local_max
from scipy import ndimage
from scipy.ndimage import label, distance_transform_edt
import tifffile
from pathlib import Path
import seaborn as sns
from scipy import stats

# Set up matplotlib for better visualization
plt.rcParams['figure.figsize'] = (12, 10)
plt.rcParams['figure.dpi'] = 100
plt.style.use('ggplot')

# Define paths based on your Drive structure
base_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Segmented/flow3_1.4Pa_18h"
output_dir = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/flow3_1.4Pa_18h"

# Create output directory structure
os.makedirs(output_dir, exist_ok=True)
for component in ['Nuclei', 'Cell', 'Membrane', 'Membrane_Adjusted']:
    os.makedirs(os.path.join(output_dir, component), exist_ok=True)

# Extract sample info from filenames
def extract_sample_info(filename):
    """
    Extract sample ID, pressure and component type from filename
    Example: denoised_1.4Pa_U_05mar19_20x_L2R_Flat_seq005_cell_mask
    """
    parts = str(filename).split('_')

    # Extract pressure (0Pa or 1.4Pa)
    pressure = None
    for part in parts:
        if 'Pa' in part:
            pressure = part

    # Debug: print filename for troubleshooting
    print(f"Processing file: {filename}")

    # Check for cell mask specifically
    if '_cell_mask' in filename:
        return {
            'sample_id': '_'.join(parts[:-2]),  # everything before "cell_mask"
            'pressure': pressure,
            'component': 'cell'
        }

    # Extract component type and everything before it for sample_id
    component = None
    component_idx = None

    components = ['Nuclei', 'nuclei', 'cell', 'Cadherins', 'membrane']
    for i, part in enumerate(parts):
        if part.lower() in [c.lower() for c in components]:
            component = part
            component_idx = i
            break

    if component_idx is not None:
        sample_id = '_'.join(parts[:component_idx])

        # Map component names to standardized versions
        component_lower = component.lower()
        if component_lower == 'nuclei':
            component_type = 'nuclei'
        elif component_lower == 'cell':
            component_type = 'cell'
        elif component_lower == 'cadherins':
            component_type = 'membrane'
        elif component_lower == 'membrane':
            # Check if this is from the Membrane_Adjusted folder
            if 'adjusted' in filename:
                component_type = 'membrane_adjusted'
            else:
                component_type = 'membrane'

        return {
            'sample_id': sample_id,
            'pressure': pressure,
            'component': component_type
        }

    # If we got here, we couldn't identify the component
    print(f"WARNING: Could not extract component from {filename}")
    return None

# Find all mask files, prioritizing Membrane_Adjusted over Membrane
def find_mask_files(base_dir):
    """Find mask files by component type"""
    # Initialize dictionaries to store mask files by component
    mask_files_by_component = {
        'nuclei': [],
        'cell': [],
        'membrane': [],
        'membrane_adjusted': []
    }

    for root, dirs, files in os.walk(base_dir):
        folder_name = os.path.basename(root)

        for file in files:
            if file.endswith('_mask.tif') or ('_membrane_mask_adjusted' in file):
                file_path = os.path.join(root, file)

                # Try to determine component from filename
                sample_info = extract_sample_info(file)
                if sample_info and sample_info['component'] in mask_files_by_component:
                    mask_files_by_component[sample_info['component']].append(file_path)

    # Print summary
    for component, files in mask_files_by_component.items():
        print(f"Found {len(files)} {component} mask files")

    # Combine all files, but use membrane_adjusted instead of membrane if available
    all_mask_files = []
    all_mask_files.extend(mask_files_by_component['nuclei'])
    all_mask_files.extend(mask_files_by_component['cell'])

    # For membrane, prefer membrane_adjusted if available
    if mask_files_by_component['membrane_adjusted']:
        print("Using membrane_adjusted files (preferred)")
        all_mask_files.extend(mask_files_by_component['membrane_adjusted'])
    else:
        print("Using membrane files (fallback)")
        all_mask_files.extend(mask_files_by_component['membrane'])

    return all_mask_files, mask_files_by_component

# Process cell mask specially to handle multiple cells
def process_cell_mask(mask):
    """
    Process cell mask to identify individual cells
    Using distance transform watershed segmentation
    """
    # Make sure mask is binary
    binary_mask = mask > 0

    # Apply distance transform
    distance = distance_transform_edt(binary_mask)

    # Find local maxima (cell centers)
    # Adjust min_distance based on your data
    min_distance = 20
    local_max = peak_local_max(distance,
                              min_distance=min_distance,
                              labels=binary_mask,
                              indices=False)

    # Create markers for watershed
    markers = label(local_max)[0]

    # Apply watershed to find cell boundaries
    labeled_cells = watershed(-distance, markers, mask=binary_mask)

    # Debug: save a visualization of the segmentation
    plt.figure(figsize=(10, 10))
    plt.imshow(labeled_cells, cmap='nipy_spectral')
    plt.title(f"Cell Segmentation (found {np.max(labeled_cells)} cells)")
    plt.colorbar()
    plt.savefig(os.path.join(output_dir, "cell_segmentation_debug.png"))
    plt.close()

    return labeled_cells

# Extract morphological features from mask
def analyze_mask(mask_path):
    """Extract morphometric features from a binary mask"""
    try:
        # Read mask
        mask = tifffile.imread(mask_path)

        # Get sample info from filename
        filename = os.path.basename(mask_path)
        sample_info = extract_sample_info(filename)

        if not sample_info:
            print(f"Could not extract sample info from {filename}")
            return None

        # Use different processing for cell masks
        if sample_info['component'] == 'cell':
            # Special processing for cell masks
            labeled_mask = process_cell_mask(mask)
            num_features = np.max(labeled_mask)
            print(f"Cell mask processed - identified {num_features} individual cells")
        else:
            # For other components, continue with standard processing
            # Ensure binary mask
            if mask.dtype != bool:
                mask = mask > 0

            # Label connected components
            labeled_mask, num_features = ndimage.label(mask)

        if num_features == 0:
            print(f"No features found in {mask_path}")
            return None

        # Calculate region properties
        regions = regionprops(labeled_mask)

        features_list = []

        # Extract features for each region in the mask
        for i, region in enumerate(regions):
            # Skip very small regions (likely noise)
            if region.area < 10:
                continue

            # Calculate additional features
            # Circularity = 4π * area / perimeter²
            circularity = 4 * np.pi * region.area / (region.perimeter ** 2) if region.perimeter > 0 else 0

            # Elongation = major_axis_length / minor_axis_length
            elongation = region.major_axis_length / region.minor_axis_length if region.minor_axis_length > 0 else 0

            # Normalize orientation to 0-90 degrees (relevant to flow direction)
            orientation_deg = np.degrees(region.orientation)
            # Normalize to 0-90 degrees range (absolute value)
            norm_orientation = abs(orientation_deg)

            features = {
                'sample_id': sample_info['sample_id'],
                'pressure': sample_info['pressure'],
                'component': sample_info['component'],
                'region_id': i,
                # Size and shape features
                'area': region.area,
                'perimeter': region.perimeter,
                'equivalent_diameter': region.equivalent_diameter,
                # Elongation features
                'major_axis_length': region.major_axis_length,
                'minor_axis_length': region.minor_axis_length,
                'elongation': elongation,
                'eccentricity': region.eccentricity,
                # Orientation features
                'orientation_degrees': norm_orientation,  # Normalized to 0-90
                # Other shape descriptors
                'solidity': region.solidity,
                'circularity': circularity
            }

            features_list.append(features)

        return features_list

    except Exception as e:
        print(f"Error analyzing mask {mask_path}: {e}")
        return None

# Save segmentation visualization for debugging
def save_segmentation_debug(mask_path, output_dir):
    """Create a debug visualization of the segmentation"""
    try:
        mask = tifffile.imread(mask_path)
        filename = os.path.basename(mask_path)

        # Process based on component type
        sample_info = extract_sample_info(filename)
        if sample_info and sample_info['component'] == 'cell':
            # Process cell mask
            labeled_mask = process_cell_mask(mask)
        else:
            # Standard processing
            if mask.dtype != bool:
                mask = mask > 0
            labeled_mask, _ = ndimage.label(mask)

        # Create debug image directory
        debug_dir = os.path.join(output_dir, "Debug_Images")
        os.makedirs(debug_dir, exist_ok=True)

        # Create visualization
        plt.figure(figsize=(12, 12))
        plt.imshow(labeled_mask, cmap='nipy_spectral')
        plt.title(f"Segmentation: {filename}\nRegions: {np.max(labeled_mask)}")
        plt.colorbar(label="Region ID")
        plt.axis('off')
        plt.tight_layout()

        # Save to debug directory
        output_file = os.path.join(debug_dir, f"{filename.replace('.tif', '')}_segmentation.png")
        plt.savefig(output_file, dpi=150)
        plt.close()

        print(f"Debug image saved to {output_file}")

    except Exception as e:
        print(f"Error creating debug image for {mask_path}: {e}")

# Create normalized violin plot
def create_violin_plot(data, x, y, hue=None, title=None, output_path=None, normalize=False):
    """Create violin plot, with option for normalized values"""
    plt.figure(figsize=(10, 6))

    if normalize:
        # Normalize the data for plotting percentages
        y_data = data[y].copy()
        groups = data[x].unique()
        normalized_data = data.copy()

        for group in groups:
            group_data = y_data[data[x] == group]
            group_min = group_data.min()
            group_max = group_data.max()
            # Normalize to 0-100% scale
            normalized_data.loc[data[x] == group, y] = ((y_data[data[x] == group] - group_min) /
                                                       (group_max - group_min)) * 100 if (group_max - group_min) > 0 else 0

        # Use normalized data for plotting
        plot_data = normalized_data
        y_label = f"{y.replace('_', ' ').title()} (%)"
    else:
        # Use original data
        plot_data = data
        y_label = y.replace('_', ' ').title()

    # Create violin plot
    ax = sns.violinplot(x=x, y=y, data=plot_data, hue=hue)

    # Add statistical comparison if applicable
    pressures = plot_data[x].unique()
    if len(pressures) == 2:
        # Perform t-test between groups
        group1 = plot_data[plot_data[x] == pressures[0]][y].dropna()
        group2 = plot_data[plot_data[x] == pressures[1]][y].dropna()

        if len(group1) > 0 and len(group2) > 0:
            try:
                stat, p_value = stats.ttest_ind(group1, group2, equal_var=False)

                # Add significance annotation
                sig_text = f'p = {p_value:.3f}'
                if p_value < 0.001:
                    sig_text += ' ***'
                elif p_value < 0.01:
                    sig_text += ' **'
                elif p_value < 0.05:
                    sig_text += ' *'

                plt.annotate(sig_text, xy=(0.5, 0.95), xycoords='axes fraction',
                            ha='center', va='center',
                            bbox=dict(boxstyle='round', fc='white', alpha=0.7))
            except Exception as e:
                plt.annotate(f"Stats error: {str(e)}", xy=(0.5, 0.95), xycoords='axes fraction',
                            ha='center', va='center',
                            bbox=dict(boxstyle='round', fc='white', alpha=0.7))

    plt.title(title if title else f'{y.replace("_", " ").title()} by {x.title()}')
    plt.ylabel(y_label)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()

    if output_path:
        plt.savefig(output_path, dpi=300)

    plt.close()

# Create histogram with normalized option
def create_histogram(data, feature, hue, title=None, output_path=None, normalize=False):
    """Create histogram with normalized option"""
    plt.figure(figsize=(12, 6))

    groups = data[hue].unique()

    if normalize:
        # Create percentage-based histogram
        for group in groups:
            group_data = data[data[hue] == group][feature].dropna()
            if len(group_data) > 0:
                # Use density=True for percentage
                sns.histplot(group_data, kde=True, label=group, alpha=0.6, stat="density")
        plt.ylabel("Density")
    else:
        # Standard count histogram
        for group in groups:
            group_data = data[data[hue] == group][feature].dropna()
            if len(group_data) > 0:
                sns.histplot(group_data, kde=True, label=group, alpha=0.6)
        plt.ylabel("Count")

    plt.title(title if title else f'{feature.replace("_", " ").title()} Distribution')
    plt.xlabel(feature.replace("_", " ").title())
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()

    if output_path:
        plt.savefig(output_path, dpi=300)

    plt.close()

# Main analysis function
def analyze_morphology(base_dir, output_dir):
    """Analyze morphological features of all masks and compare between pressures"""
    print("Starting morphological analysis...")

    # Step 1: Find all mask files by component
    mask_files, mask_files_by_component = find_mask_files(base_dir)
    print(f"Found {len(mask_files)} total mask files")

    # Step 2: First create debug visualizations of segmentation
    print("Creating segmentation debug visualizations...")
    for component, files in mask_files_by_component.items():
        if component == 'cell':  # Focus on cell masks for debugging
            for file in files:
                save_segmentation_debug(file, output_dir)

    # Step 3: Extract features from each mask
    all_features = []
    for file in mask_files:
        features = analyze_mask(file)
        if features:
            all_features.extend(features)

    # Step 4: Convert to DataFrame
    if not all_features:
        print("No features could be extracted. Check your mask files.")
        return

    df = pd.DataFrame(all_features)

    # Debug: print dataset summary
    print("\nDataset summary:")
    print(f"Total features: {len(df)}")
    print(df['component'].value_counts())
    print(df['pressure'].value_counts())

    # Validate data - check for empty component groups
    for component in df['component'].unique():
        component_data = df[df['component'] == component]
        print(f"\n{component} data summary:")
        print(f"  Total records: {len(component_data)}")
        print(f"  Unique regions: {component_data['region_id'].nunique()}")
        print(f"  Pressure distribution: {component_data['pressure'].value_counts().to_dict()}")

        # Check for key metrics
        for metric in ['area', 'perimeter', 'elongation', 'orientation_degrees']:
            if metric in component_data.columns:
                print(f"  {metric}: mean={component_data[metric].mean():.2f}, min={component_data[metric].min():.2f}, max={component_data[metric].max():.2f}")

    # Save raw data
    features_csv = os.path.join(output_dir, "morphological_features.csv")
    df.to_csv(features_csv, index=False)
    print(f"Saved features to {features_csv}")

    # Step 5: Analyze by component type (nuclei, cell, membrane)
    components = df['component'].unique()
    pressures = df['pressure'].unique()

    print(f"Analyzing components: {components}")
    print(f"Comparing pressures: {pressures}")

    # Features to analyze
    features_to_compare = [
        'area', 'perimeter', 'eccentricity', 'elongation',
        'major_axis_length', 'minor_axis_length',
        'orientation_degrees', 'solidity', 'circularity'
    ]

    # Create a separate plot comparing orientation between nuclei and cells
    if 'nuclei' in df['component'].unique() and 'cell' in df['component'].unique():
        for pressure in pressures:
            pressure_df = df[df['pressure'] == pressure]
            nuclei_orientation = pressure_df[pressure_df['component'] == 'nuclei']['orientation_degrees']
            cell_orientation = pressure_df[pressure_df['component'] == 'cell']['orientation_degrees']

            if len(nuclei_orientation) > 0 and len(cell_orientation) > 0:
                plt.figure(figsize=(10, 6))
                plt.hist(nuclei_orientation, bins=18, alpha=0.5, label='Nuclei', density=True)
                plt.hist(cell_orientation, bins=18, alpha=0.5, label='Cell', density=True)
                plt.title(f'Orientation Comparison - {pressure}')
                plt.xlabel('Orientation (degrees)')
                plt.ylabel('Density')
                plt.legend()
                plt.grid(True, linestyle='--', alpha=0.7)
                plt.tight_layout()
                plt.savefig(os.path.join(output_dir, f"orientation_comparison_{pressure}.png"), dpi=300)
                plt.close()

    # Loop through each component type
    for component in components:
        component_df = df[df['component'] == component]

        # Skip if not enough data
        if len(component_df) < 5:
            print(f"Not enough data for {component} analysis")
            continue

        component_dir = os.path.join(output_dir, component.capitalize())

        # Create violin plots for each feature (both regular and normalized)
        for feature in features_to_compare:
            if feature not in component_df.columns:
                continue

            # Regular violin plot
            create_violin_plot(
                data=component_df,
                x='pressure',
                y=feature,
                title=f'{component.capitalize()} {feature.replace("_", " ").title()} by Pressure',
                output_path=os.path.join(component_dir, f"{feature}_violinplot.png"),
                normalize=False
            )

            # Normalized violin plot
            create_violin_plot(
                data=component_df,
                x='pressure',
                y=feature,
                title=f'{component.capitalize()} {feature.replace("_", " ").title()} by Pressure (Normalized)',
                output_path=os.path.join(component_dir, f"{feature}_violinplot_normalized.png"),
                normalize=True
            )

        # Create scatter plots for feature relationships
        scatter_relationships = [
            ('area', 'perimeter'),
            ('major_axis_length', 'minor_axis_length'),
            ('eccentricity', 'circularity'),
            ('eccentricity', 'orientation_degrees'),
            ('elongation', 'area')
        ]

        for x_feature, y_feature in scatter_relationships:
            if x_feature not in component_df.columns or y_feature not in component_df.columns:
                continue

            plt.figure(figsize=(10, 8))

            # Check if we have enough data points
            has_data = False
            for pressure in pressures:
                pressure_data = component_df[component_df['pressure'] == pressure]
                if len(pressure_data) > 0:
                    has_data = True
                    # Scatter plot
                    sns.scatterplot(x=x_feature, y=y_feature, data=pressure_data,
                                  label=pressure, alpha=0.6)

                    # Regression line if we have enough points
                    if len(pressure_data) > 2:
                        sns.regplot(x=x_feature, y=y_feature, data=pressure_data,
                                  scatter=False, label=f'{pressure} trend')

            if has_data:
                plt.title(f'{component.capitalize()}: {y_feature.replace("_", " ").title()} vs {x_feature.replace("_", " ").title()}')
                plt.legend()
                plt.grid(True, linestyle='--', alpha=0.7)
                plt.tight_layout()
                plt.savefig(os.path.join(component_dir, f"{y_feature}_vs_{x_feature}_scatter.png"), dpi=300)
            else:
                print(f"Not enough data for {component} {y_feature} vs {x_feature} scatter plot")

            plt.close()

        # Create histograms for key features (both regular and normalized)
        for feature in ['area', 'elongation', 'circularity', 'orientation_degrees']:
            if feature not in component_df.columns:
                continue

            # Regular histogram
            create_histogram(
                data=component_df,
                feature=feature,
                hue='pressure',
                title=f'{component.capitalize()} {feature.replace("_", " ").title()} Distribution',
                output_path=os.path.join(component_dir, f"{feature}_histogram.png"),
                normalize=False
            )

            # Normalized histogram
            create_histogram(
                data=component_df,
                feature=feature,
                hue='pressure',
                title=f'{component.capitalize()} {feature.replace("_", " ").title()} Distribution (Normalized)',
                output_path=os.path.join(component_dir, f"{feature}_histogram_normalized.png"),
                normalize=True
            )

    # Create summary statistics
    summary = df.groupby(['component', 'pressure']).agg({
        'area': ['mean', 'std', 'median', 'count'],
        'perimeter': ['mean', 'std', 'median'],
        'elongation': ['mean', 'std', 'median'],
        'eccentricity': ['mean', 'std', 'median'],
        'circularity': ['mean', 'std', 'median'],
        'orientation_degrees': ['mean', 'std', 'median'],
    }).reset_index()

    summary.to_csv(os.path.join(output_dir, "morphology_summary_statistics.csv"))

    # Create separate summary visualizations for each component
    summary_features = ['area', 'elongation', 'circularity', 'orientation_degrees']

    for component in components:
        component_df = df[df['component'] == component]
        if len(component_df) < 5:
            continue

        fig, axs = plt.subplots(1, len(summary_features), figsize=(5*len(summary_features), 4))

        for j, feat in enumerate(summary_features):
            if len(summary_features) == 1:
                ax = axs
            else:
                ax = axs[j]

            # Use violin plot
            sns.violinplot(x='pressure', y=feat, data=component_df, ax=ax)
            ax.set_title(f'{feat.replace("_", " ").title()}')
            ax.set_xlabel('Pressure')

            # Add sample counts
            for i, pressure in enumerate(sorted(component_df['pressure'].unique())):
                count = len(component_df[component_df['pressure'] == pressure])
                ax.annotate(f'n={count}', xy=(i, ax.get_ylim()[0] + (ax.get_ylim()[1] - ax.get_ylim()[0])*0.05),
                           ha='center', va='bottom')

        plt.suptitle(f'{component.capitalize()} Morphology Summary', fontsize=16)
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, f"{component}_morphology_summary.png"), dpi=300)
        plt.close()

    print("Morphological analysis complete! Results saved to:", output_dir)

# Run the analysis
analyze_morphology(base_dir, output_dir)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Starting morphological analysis...
Processing file: denoised_0Pa_U_05mar19_20x_L2RA_Flat_seq001_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_0Pa_U_05mar19_20x_L2RA_Flat_seq002_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_0Pa_U_05mar19_20x_L2RA_Flat_seq003_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_1.4Pa_U_05mar19_20x_L2R_Flat_seq001_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_1.4Pa_U_05mar19_20x_L2R_Flat_seq002_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_1.4Pa_U_05mar19_20x_L2R_Flat_seq003_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_1.4Pa_U_05mar19_20x_L2R_Flat_seq004_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_1.4Pa_U_05mar19_20x_L2R_Flat_seq005_Nuclei_contrast_bg_tophat_mask.tif
Processing file: denoised_0Pa_U_05mar19_20x_L2RA_Flat_seq001_Ca

  plt.tight_layout()
  plt.savefig(output_path, dpi=300)
  plt.tight_layout()
  plt.savefig(output_path, dpi=300)


Morphological analysis complete! Results saved to: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/flow3_1.4Pa_18h
