In [1]:
import pandas as pd
import os
from google.colab import drive

# Mount Google Drive - This will require authorization
try:
    drive.mount('/content/drive')
    print("Google Drive mounted successfully.")
except Exception as e:
    print(f"Error mounting Google Drive: {e}")
    print("Please ensure you are running this in a Google Colab environment and authorize access.")
    # Exit if drive mounting fails, as further operations will fail
    exit()

# Define the base path and folder names
# IMPORTANT: Verify this base_path is correct after mounting your drive!
base_path = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/"
folders = ["Static-x40", "Static-x20", "flow3-x20", "1.4Pa-x40", "1.4Pa-x20"]
file_name = "Senescence_Results/cell_classification_rule_based_full.csv"

# List to store individual dataframes
all_data = []

print("\nStarting data loading process...")
# Loop through each folder and load the data
for folder in folders:
    file_path = os.path.join(base_path, folder, file_name)
    print(f"Attempting to load data from: {file_path}")

    if not os.path.exists(os.path.join(base_path, folder)):
        print(f"Warning: Folder '{folder}' not found at '{os.path.join(base_path, folder)}'. Skipping.")
        continue
    # It's better to check the full path for the file itself
    if not os.path.exists(file_path):
        print(f"Warning: File '{file_name}' not found at '{file_path}'. Skipping.")
        continue

    try:
        df = pd.read_csv(file_path)

        # Extract pressure
        if "Static" in folder or "flow3" in folder:
            df['pressure'] = '0Pa'
        elif "1.4Pa" in folder:
            df['pressure'] = '1.4Pa'
        else:
            df['pressure'] = df['cell_id_unique'].apply(
                lambda x: '1.4Pa' if isinstance(x, str) and x.startswith('1.4Pa')
                else ('0Pa' if isinstance(x, str) else 'Unknown')
            )

        # Extract magnification source
        if "x40" in folder:
            df['magnification_source'] = 'x40'
        elif "x20" in folder:
            df['magnification_source'] = 'x20'
        else:
            # Fallback if folder name doesn't specify, though your names do
            df['magnification_source'] = 'unknown'

        all_data.append(df)
        print(f"Successfully loaded data from: {file_path}")
        print(f"  Folder: {folder} -> Pressure: {df['pressure'].unique()[0]}, Magnification: {df['magnification_source'].unique()[0]}")

    except Exception as e:
        print(f"Error processing file {file_path}: {e}")

# Concatenate all dataframes
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    print("\nCombined DataFrame Info (before adjustments):")
    combined_df.info(verbose=True, show_counts=True)

    # --- Apply magnification adjustments ---
    print("\nApplying magnification adjustments to cell_area and cell_perimeter...")

    # Ensure columns exist and are numeric before adjustment
    if 'cell_area' in combined_df.columns:
        combined_df['cell_area'] = pd.to_numeric(combined_df['cell_area'], errors='coerce')
    else:
        print("Warning: 'cell_area' column not found. Cannot apply area adjustment.")

    if 'cell_perimeter' in combined_df.columns:
        combined_df['cell_perimeter'] = pd.to_numeric(combined_df['cell_perimeter'], errors='coerce')
    else:
        print("Warning: 'cell_perimeter' column not found. Cannot apply perimeter adjustment.")

    # Identify rows from x40 magnification
    is_x40 = combined_df['magnification_source'] == 'x40'
    num_x40_rows = is_x40.sum()
    print(f"Found {num_x40_rows} rows from x40 magnification datasets for adjustment.")

    if num_x40_rows > 0:
        if 'cell_area' in combined_df.columns:
            original_area_sum_x40 = combined_df.loc[is_x40, 'cell_area'].sum()
            combined_df.loc[is_x40, 'cell_area'] = combined_df.loc[is_x40, 'cell_area'] / 4.0
            adjusted_area_sum_x40 = combined_df.loc[is_x40, 'cell_area'].sum()
            print(f"  Cell area for x40 data adjusted (divided by 4). Original sum: {original_area_sum_x40}, Adjusted sum: {adjusted_area_sum_x40}")

        if 'cell_perimeter' in combined_df.columns:
            original_perimeter_sum_x40 = combined_df.loc[is_x40, 'cell_perimeter'].sum()
            combined_df.loc[is_x40, 'cell_perimeter'] = combined_df.loc[is_x40, 'cell_perimeter'] / 2.0
            adjusted_perimeter_sum_x40 = combined_df.loc[is_x40, 'cell_perimeter'].sum()
            print(f"  Cell perimeter for x40 data adjusted (divided by 2). Original sum: {original_perimeter_sum_x40}, Adjusted sum: {adjusted_perimeter_sum_x40}")
    # ---------------------------------------

    print("\nCombined DataFrame Head (first 5 rows after adjustments):")
    print(combined_df[['cell_id_unique', 'pressure', 'magnification_source', 'cell_area', 'cell_perimeter']].head())

    print("\nUnique values in 'pressure' column after combining:")
    print(combined_df['pressure'].unique())
    print("\nValue counts for 'pressure':")
    print(combined_df['pressure'].value_counts())
    print("\nUnique values in 'cell_type' column after combining:")
    print(combined_df['cell_type'].unique())
    print("\nValue counts for 'cell_type':")
    print(combined_df['cell_type'].value_counts(dropna=False))


    # Define the columns for analysis
    analysis_columns = [
        'angular_misalignment_to_flow_deg',
        'cell_area', # Now adjusted
        'cell_perimeter', # Now adjusted
        'cell_eccentricity',
        'cell_circularity',
        'cell_aspect_ratio'
    ]

    if 'cell_type' not in combined_df.columns:
        print("Error: 'cell_type' column not found in the combined data. Cannot proceed with analysis.")
        exit()
    if 'pressure' not in combined_df.columns:
        print("Error: 'pressure' column not found in the combined data. Cannot proceed with analysis.")
        exit()

    combined_df_filtered = combined_df[
        combined_df['cell_type'].isin(['Senescent', 'Non-senescent']) &
        combined_df['pressure'].isin(['0Pa', '1.4Pa'])
    ].copy()

    print(f"\nShape of combined_df: {combined_df.shape}")
    print(f"Shape of combined_df_filtered: {combined_df_filtered.shape}")

    if combined_df_filtered.empty:
        print("DataFrame is empty after filtering for cell_type and pressure. Check your data.")
    else:
        print("\nValue counts for 'pressure' in filtered data:")
        print(combined_df_filtered['pressure'].value_counts())
        print("\nValue counts for 'cell_type' in filtered data:")
        print(combined_df_filtered['cell_type'].value_counts())

        print("\nConverting analysis columns to numeric type (if not already)...")
        for col in analysis_columns:
            if col in combined_df_filtered.columns:
                # If already numeric due to earlier conversion, this won't harm
                combined_df_filtered[col] = pd.to_numeric(combined_df_filtered[col], errors='coerce')
            else:
                print(f"  Warning: Analysis column '{col}' not found in the filtered DataFrame. It will be skipped.")
                if col in analysis_columns: analysis_columns.remove(col) # remove if truly missing

        valid_analysis_columns = [col for col in analysis_columns if col in combined_df_filtered.columns and pd.api.types.is_numeric_dtype(combined_df_filtered[col])]

        if not valid_analysis_columns:
            print("\nNo valid numeric columns found for analysis after filtering and type conversion. Cannot generate statistics.")
        else:
            print(f"\nPerforming descriptive analysis on (adjusted) columns: {valid_analysis_columns}")
            try:
                descriptive_stats = combined_df_filtered.groupby(['pressure', 'cell_type'])[valid_analysis_columns].agg(['mean', 'median', 'std', 'min', 'max', 'count'])
                print("\n--- Descriptive Statistics by Pressure and Cell Type (with x40 adjustments) ---")
                print(descriptive_stats)

                output_stats_path = os.path.join(base_path, "descriptive_stats_by_pressure_cell_type_adjusted.csv")
                descriptive_stats.to_csv(output_stats_path)
                print(f"\nDescriptive statistics (with adjustments) saved to: {output_stats_path}")

            except Exception as e:
                print(f"Error during groupby or aggregation: {e}")

    output_combined_file_path = os.path.join(base_path, "combined_cell_data_adjusted.csv")
    combined_df.to_csv(output_combined_file_path, index=False)
    print(f"\nCombined data (with adjustments and new columns) saved to: {output_combined_file_path}")

else:
    print("\nNo data was successfully loaded. Please check file paths and ensure Google Drive is mounted correctly.")
    print(f"  Expected base path for your files: {base_path}")
    print(f"  Expected subfolders: {folders}")
    print(f"  Expected file name within subfolders: {file_name}")

Mounted at /content/drive
Google Drive mounted successfully.

Starting data loading process...
Attempting to load data from: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x40/Senescence_Results/cell_classification_rule_based_full.csv
Successfully loaded data from: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x40/Senescence_Results/cell_classification_rule_based_full.csv
  Folder: Static-x40 -> Pressure: 0Pa, Magnification: x40
Attempting to load data from: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/cell_classification_rule_based_full.csv
Successfully loaded data from: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/cell_classification_rule_based_full.csv
  Folder: Static-x20 -> Pressure: 0Pa, Magnification: x20
Attempting to load data from: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/flow3-x20/Senescence_R

In [3]:
import pandas as pd
import numpy as np
import os
from google.colab import drive

# Mount Google Drive
try:
    drive.mount('/content/drive')
    print("Google Drive mounted successfully.")
except Exception as e:
    print(f"Error mounting Google Drive: {e}")
    exit()

# Define the base path and folder names
base_path = "/content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/"
folders = ["Static-x40", "Static-x20", "flow3-x20", "1.4Pa-x40", "1.4Pa-x20"]
file_name = "Senescence_Results/cell_classification_rule_based_full.csv"

all_data = []
print("\nStarting data loading process...")
for folder in folders:
    file_path = os.path.join(base_path, folder, file_name)
    print(f"Attempting to load data from: {file_path}")
    if not os.path.exists(file_path):
        print(f"Warning: File not found at '{file_path}'. Skipping.")
        continue
    try:
        df = pd.read_csv(file_path)
        # --- Pressure Extraction ---
        if "Static" in folder or "flow3" in folder: # Assuming flow3 is also 0Pa
            df['pressure_condition'] = '0Pa' # Changed column name for clarity
        elif "1.4Pa" in folder:
            df['pressure_condition'] = '1.4Pa'
        else:
            # Fallback if folder name is ambiguous (should ideally not happen with given folders)
            df['pressure_condition'] = 'Unknown'

        # --- Magnification Extraction ---
        if "x40" in folder:
            df['magnification_source'] = 'x40'
        elif "x20" in folder:
            df['magnification_source'] = 'x20'
        else:
            df['magnification_source'] = 'unknown'

        all_data.append(df)
        print(f"Successfully loaded data from: {file_path} (Pressure: {df['pressure_condition'].iloc[0]}, Mag: {df['magnification_source'].iloc[0]})")
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")

if not all_data:
    print("\nNo data was successfully loaded. Exiting.")
    exit()

combined_df = pd.concat(all_data, ignore_index=True)
print(f"\nCombined data from {len(all_data)} files. Total rows: {len(combined_df)}")

# --- Preprocessing ---
if 'cell_id_unique' not in combined_df.columns:
    print("Error: 'cell_id_unique' column not found. Cannot create image_id. Exiting.")
    exit()
combined_df['image_id'] = combined_df['cell_id_unique'].astype(str).str.rsplit('_', n=1).str[0]
print(f"\nCreated 'image_id' column. Example: {combined_df['image_id'].iloc[0] if len(combined_df) > 0 else 'N/A'}")

for col_name in ['cell_area', 'cell_perimeter', 'angular_misalignment_to_flow_deg']:
    if col_name not in combined_df.columns:
        print(f"Warning: Column '{col_name}' not found. It will be created as NaN.")
        combined_df[col_name] = np.nan
    else:
        combined_df[col_name] = pd.to_numeric(combined_df[col_name], errors='coerce')

if 'cell_area' in combined_df.columns and 'magnification_source' in combined_df.columns:
    is_x40 = combined_df['magnification_source'] == 'x40'
    combined_df.loc[is_x40, 'cell_area'] /= 4.0
    print(f"Adjusted 'cell_area' for {is_x40.sum()} rows from x40 magnification.")
if 'cell_perimeter' in combined_df.columns and 'magnification_source' in combined_df.columns:
    is_x40 = combined_df['magnification_source'] == 'x40'
    combined_df.loc[is_x40, 'cell_perimeter'] /= 2.0
    print(f"Adjusted 'cell_perimeter' for {is_x40.sum()} rows from x40 magnification.")

if 'cell_type' not in combined_df.columns:
    print("Error: 'cell_type' column not found. Exiting.")
    exit()
combined_df_valid_cells = combined_df[combined_df['cell_type'].isin(['Senescent', 'Non-senescent'])].copy()
print(f"Filtered for Senescent/Non-senescent cell types. Rows remaining: {len(combined_df_valid_cells)}")


# --- Identify Mechanoadaptive Images (and associate pressure) ---
print("\nIdentifying mechanoadaptive images and their pressure conditions...")

senescent_area_per_image = combined_df_valid_cells[
    combined_df_valid_cells['cell_type'] == 'Senescent'
].groupby('image_id')['cell_area'].sum().rename('senescent_total_area')

all_cell_area_per_image = combined_df_valid_cells.groupby('image_id')['cell_area'].sum().rename('all_cells_total_area')

# Get the pressure condition for each image_id (should be consistent per image)
pressure_per_image = combined_df_valid_cells.groupby('image_id')['pressure_condition'].first().rename('pressure_condition')

# Combine into a summary DataFrame per image
image_summary_df = pd.DataFrame(all_cell_area_per_image)
image_summary_df = image_summary_df.join(senescent_area_per_image, on='image_id')
image_summary_df = image_summary_df.join(pressure_per_image, on='image_id')
image_summary_df['senescent_total_area'] = image_summary_df['senescent_total_area'].fillna(0) # Ensure images with no senescent cells have 0 area for them

image_summary_df['percentage_senescent_area'] = np.where(
    image_summary_df['all_cells_total_area'] > 0,
    (image_summary_df['senescent_total_area'] / image_summary_df['all_cells_total_area']) * 100,
    np.nan
)
image_summary_df = image_summary_df.dropna(subset=['percentage_senescent_area', 'pressure_condition'])

mechanoadaptive_threshold = 35
image_summary_df['is_mechanoadaptive'] = image_summary_df['percentage_senescent_area'] < mechanoadaptive_threshold

mechanoadaptive_image_ids = image_summary_df[image_summary_df['is_mechanoadaptive']].index.tolist()

print(f"Found {len(image_summary_df)} images with valid cell area and pressure data.")
print(f"Identified {len(mechanoadaptive_image_ids)} mechanoadaptive images (Senescent area < {mechanoadaptive_threshold}%).")

if not mechanoadaptive_image_ids:
    print("No mechanoadaptive images found. Exiting analysis for orientation.")
    exit()
else:
    print("Example mechanoadaptive image IDs:", mechanoadaptive_image_ids[:3])
    print("Pressure conditions in mechanoadaptive images summary:")
    print(image_summary_df[image_summary_df['is_mechanoadaptive']]['pressure_condition'].value_counts())


image_summary_output_path = os.path.join(base_path, "image_mechanoadaptation_summary_with_pressure.csv")
image_summary_df.to_csv(image_summary_output_path)
print(f"Image mechanoadaptation summary saved to: {image_summary_output_path}")


# --- Calculate Orientation Statistics for Mechanoadaptive Images, grouped by Pressure ---
print("\nCalculating orientation statistics for mechanoadaptive images, grouped by pressure...")

cells_in_mechanoadaptive_images_df = combined_df_valid_cells[
    combined_df_valid_cells['image_id'].isin(mechanoadaptive_image_ids)
].copy()

if cells_in_mechanoadaptive_images_df.empty:
    print("No cells found belonging to mechanoadaptive images.")
    exit()

if 'angular_misalignment_to_flow_deg' not in cells_in_mechanoadaptive_images_df.columns or \
   cells_in_mechanoadaptive_images_df['angular_misalignment_to_flow_deg'].isnull().all():
    print("Warning: 'angular_misalignment_to_flow_deg' column is missing or all NaN in mechanoadaptive images. Cannot calculate orientation statistics.")
else:
    all_orientation_stats = []

    # 1. Senescent cells in mechanoadaptive images, by pressure
    sen_mech_cells = cells_in_mechanoadaptive_images_df[cells_in_mechanoadaptive_images_df['cell_type'] == 'Senescent']
    if not sen_mech_cells.empty and not sen_mech_cells['angular_misalignment_to_flow_deg'].isnull().all():
        stats_sen_pressure = sen_mech_cells.groupby('pressure_condition')['angular_misalignment_to_flow_deg'].agg(['mean', 'median', 'std', 'count'])
        stats_sen_pressure['Cell Group'] = 'Senescent'
        all_orientation_stats.append(stats_sen_pressure.reset_index()) # reset_index to make pressure_condition a column
    else:
        print("No Senescent cells with orientation data found in mechanoadaptive images, or data insufficient for grouping by pressure.")

    # 2. Non-senescent cells in mechanoadaptive images, by pressure
    nonsen_mech_cells = cells_in_mechanoadaptive_images_df[cells_in_mechanoadaptive_images_df['cell_type'] == 'Non-senescent']
    if not nonsen_mech_cells.empty and not nonsen_mech_cells['angular_misalignment_to_flow_deg'].isnull().all():
        stats_nonsen_pressure = nonsen_mech_cells.groupby('pressure_condition')['angular_misalignment_to_flow_deg'].agg(['mean', 'median', 'std', 'count'])
        stats_nonsen_pressure['Cell Group'] = 'Non-senescent'
        all_orientation_stats.append(stats_nonsen_pressure.reset_index())
    else:
        print("No Non-senescent cells with orientation data found in mechanoadaptive images, or data insufficient for grouping by pressure.")

    # 3. All cells together in mechanoadaptive images, by pressure
    if not cells_in_mechanoadaptive_images_df.empty and not cells_in_mechanoadaptive_images_df['angular_misalignment_to_flow_deg'].isnull().all():
        stats_all_pressure = cells_in_mechanoadaptive_images_df.groupby('pressure_condition')['angular_misalignment_to_flow_deg'].agg(['mean', 'median', 'std', 'count'])
        stats_all_pressure['Cell Group'] = 'All Cells'
        all_orientation_stats.append(stats_all_pressure.reset_index())
    else:
        print("No cells with orientation data found overall in mechanoadaptive images for grouping by pressure.")

    if all_orientation_stats:
        final_orientation_stats_df = pd.concat(all_orientation_stats)
        # Reorder columns for clarity
        cols_order = ['pressure_condition', 'Cell Group', 'mean', 'median', 'std', 'count']
        # Ensure all columns exist before reordering
        cols_order = [col for col in cols_order if col in final_orientation_stats_df.columns]
        final_orientation_stats_df = final_orientation_stats_df[cols_order]

        print("\n--- Orientation Statistics for Mechanoadaptive Images (Grouped by Pressure) ---")
        print(final_orientation_stats_df)

        orientation_stats_output_path = os.path.join(base_path, "orientation_stats_mechanoadaptive_images_by_pressure.csv")
        final_orientation_stats_df.to_csv(orientation_stats_output_path, index=False)
        print(f"Orientation statistics (grouped by pressure) saved to: {orientation_stats_output_path}")
    else:
        print("No orientation statistics could be calculated and grouped by pressure.")

print("\nAnalysis complete.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive mounted successfully.

Starting data loading process...
Attempting to load data from: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x40/Senescence_Results/cell_classification_rule_based_full.csv
Successfully loaded data from: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x40/Senescence_Results/cell_classification_rule_based_full.csv (Pressure: 0Pa, Mag: x40)
Attempting to load data from: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/cell_classification_rule_based_full.csv
Successfully loaded data from: /content/drive/MyDrive/knowledge/University/Master/Thesis/Analysis/Static-x20/Senescence_Results/cell_classification_rule_based_full.csv (Pressure: 0Pa, Mag: x20)
Attempting to load data from: /content/drive/MyDrive/knowledge/University/Master