In [None]:
# %% Import Libraries
import os
import re
import gc
import torch
import numpy as np
import tifffile
import pandas as pd
from nd2reader import ND2Reader
from tqdm import tqdm
from cellpose import models

# %% Function Definitions

def convert_nd2_to_tiff_channel(nd2_path, tiff_output_dir, channel_name):
    """
    Converts an ND2 file to TIFF files by performing a maximum intensity projection
    over the z-axis for the specified channel.
    """
    try:
        with ND2Reader(nd2_path) as img:
            print(f"\nProcessing ND2 file: {os.path.basename(nd2_path)}")
            print("Detected Axes:", img.axes)
            print("Detected Shape:", img.shape)
            channels = img.metadata.get('channels')
            if channels is None:
                raise ValueError("No channel information found in ND2 metadata.")
            print("Channels in file:", channels)
            if channel_name in channels:
                channel_index = channels.index(channel_name)
                print(f"Using channel '{channel_name}' at index {channel_index}.")
            else:
                print(f"Channel '{channel_name}' not found in {nd2_path}. Skipping conversion for this channel.")
                return
            img.default_coords['c'] = channel_index

            # Set up iteration: if a time axis exists, iterate over it and bundle z,y,x.
            if 't' in img.axes and img.sizes.get('t', 1) > 1:
                img.iter_axes = 't'
                img.bundle_axes = 'zyx'
            else:
                img.iter_axes = ''
                img.bundle_axes = 'zyx'
                
            # For each time frame (or single image), perform a max projection along z.
            for idx, frame in enumerate(img):
                max_proj = np.max(frame, axis=0)
                base_name = os.path.splitext(os.path.basename(nd2_path))[0]
                if 't' in img.axes and img.sizes.get('t', 1) > 1:
                    tiff_filename = f"{base_name}_t{idx}.tiff"
                else:
                    tiff_filename = f"{base_name}.tiff"
                tiff_path = os.path.join(tiff_output_dir, tiff_filename)
                tifffile.imwrite(tiff_path, max_proj)
                print(f"Saved TIFF: {tiff_path}")
    except Exception as e:
        print(f"Error converting {nd2_path} for channel {channel_name} to TIFF: {e}")

def generate_cellmask(tiff_path, mask_path, model):
    """
    Generates a cell mask from the given TIFF image using Cellpose and saves the mask.
    """
    try:
        img = tifffile.imread(tiff_path)
        # If the image is 3D (a stack), average over the first axis to obtain a 2D image.
        if img.ndim == 3:
            img = img.mean(axis=0)
        elif img.ndim > 3:
            raise ValueError(f"Unexpected number of dimensions ({img.ndim}) in image: {tiff_path}")

        img_min, img_max = np.min(img), np.max(img)
        dynamic_range = img_max - img_min
        if dynamic_range == 0:
            print(f"Warning: Image {tiff_path} has zero dynamic range. Skipping mask generation.")
            return
        img = img.astype(np.float32)
        img = (img - img_min) / dynamic_range

        if not np.isfinite(img).all():
            print(f"Warning: Image {tiff_path} contains non-finite values after normalization. Skipping.")
            return

        # Run Cellpose (adjust parameters such as diameter if needed)
        masks, flows, styles, diams = model.eval(
            img,
            diameter=130,
            flow_threshold=0.4,
            cellprob_threshold=0.0,
            channels=[0, 0]
        )
        tifffile.imwrite(mask_path, masks.astype(np.uint16))
        print(f"Saved cell mask: {mask_path}")
    except Exception as e:
        print(f"Error generating mask for {tiff_path}: {e}")
    finally:
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

def calculate_intensities(tiff_files, tiff_dirs, mask_dir, output_csv):
    """
    For each TIFF file, calculates the average intensity per cell (using the generated mask)
    for both GFP and DSred channels and saves the data to a CSV file.
    """
    data = []
    for tiff_file in tqdm(tiff_files, desc="Calculating Intensities"):
        gfp_path = os.path.join(tiff_dirs["GFP"], tiff_file)
        dsred_path = os.path.join(tiff_dirs["DSred"], tiff_file)
        mask_filename = os.path.splitext(tiff_file)[0] + "_mask.tiff"
        mask_path = os.path.join(mask_dir, mask_filename)
        
        if not os.path.exists(gfp_path):
            print(f"GFP image {gfp_path} not found. Skipping.")
            continue
        if not os.path.exists(dsred_path):
            print(f"DSred image {dsred_path} not found. Skipping.")
            continue
        if not os.path.exists(mask_path):
            print(f"Mask image {mask_path} not found. Skipping.")
            continue
        
        gfp_img = tifffile.imread(gfp_path)
        dsred_img = tifffile.imread(dsred_path)
        mask_img = tifffile.imread(mask_path)
        
        mask_indices = np.unique(mask_img)
        mask_indices = mask_indices[mask_indices != 0]  # exclude background
        
        for idx in mask_indices:
            cell_mask = (mask_img == idx)
            avg_gfp = np.mean(gfp_img[cell_mask])
            avg_dsred = np.mean(dsred_img[cell_mask])
            data.append({
                "File Name": tiff_file,
                "Mask Index": idx,
                "Average GFP Intensity": avg_gfp,
                "Average DSred Intensity": avg_dsred
            })
    
    df = pd.DataFrame(data)
    df.to_csv(output_csv, index=False)
    print(f"Cell-level intensity data saved to {output_csv}")

def extract_info_from_filename(filename):
    """
    Extracts group (Act/Rep), sample (e.g., 01), and repeat (e.g., 002) from the TIFF filename.
    It first removes any time suffix (e.g., _t0) if present.
    """
    base = re.sub(r'_t\d+', '', os.path.splitext(filename)[0])
    pattern = re.compile(r'^(Act|Rep)_(\d{2})_(\d{3})', re.IGNORECASE)
    m = pattern.search(base)
    if m:
        return m.group(1), m.group(2), m.group(3)
    else:
        return 'Unknown', 'Unknown', 'Unknown'

def run_conversion(nd2_dir, dsred_tiff_dir, gfp_tiff_dir):
    """
    Converts all ND2 files in nd2_dir to TIFFs for DSred and GFP channels.
    """
    nd2_files = [f for f in os.listdir(nd2_dir) if f.lower().endswith('.nd2')]
    if not nd2_files:
        print("No ND2 files found in", nd2_dir)
        return
    print(f"Found {len(nd2_files)} ND2 file(s) for conversion.")
    for nd2_file in nd2_files:
        nd2_path = os.path.join(nd2_dir, nd2_file)
        print(f"\n--- Processing: {nd2_file} ---")
        convert_nd2_to_tiff_channel(nd2_path, dsred_tiff_dir, '60x DSRED')
        convert_nd2_to_tiff_channel(nd2_path, gfp_tiff_dir, '60x GFP')

def run_mask_generation(dsred_tiff_dir, mask_dir, model):
    """
    Generates cell masks from DSred TIFF files.
    """
    dsred_tiff_files = [f for f in os.listdir(dsred_tiff_dir)
                        if f.lower().endswith(('.tiff', '.tif')) and '_mask' not in f]
    if not dsred_tiff_files:
        print("No DSred TIFF files found in", dsred_tiff_dir)
        return
    print(f"Generating cell masks for {len(dsred_tiff_files)} DSred TIFF file(s)...")
    for tiff_file in tqdm(dsred_tiff_files, desc="Generating Masks"):
        tiff_path = os.path.join(dsred_tiff_dir, tiff_file)
        mask_filename = os.path.splitext(tiff_file)[0] + "_mask.tiff"
        mask_path = os.path.join(mask_dir, mask_filename)
        generate_cellmask(tiff_path, mask_path, model)
    print("Cell mask generation completed.")

def run_calculation(dsred_tiff_dir, gfp_tiff_dir, mask_dir, output_dir):
    """
    Calculates intensities, writes a detailed CSV, then creates an Excel file with two worksheets:
      1. Detailed Cell Data: one row per cell with Group, Sample, Repeat, Mask Index, GFP_Mean, DSred_Mean.
      2. Wide Data: pivoted data so that each unique (Group, Sample, Repeat) is one row,
         and individual cell measurements become separate columns.
    """
    dsred_tiff_files = [f for f in os.listdir(dsred_tiff_dir)
                        if f.lower().endswith(('.tiff', '.tif')) and '_mask' not in f]
    if not dsred_tiff_files:
        print("No DSred TIFF files found in", dsred_tiff_dir)
        return
    output_csv = os.path.join(output_dir, "cell_intensities.csv")
    tiff_dirs = {"GFP": gfp_tiff_dir, "DSred": dsred_tiff_dir}
    calculate_intensities(dsred_tiff_files, tiff_dirs, mask_dir, output_csv)
    
    try:
        # Read the detailed cell-level data from the CSV
        df = pd.read_csv(output_csv)
        
        # Extract Group, Sample, and Repeat from the File Name.
        # (Assumes filenames like "Act_01_001.tiff" or "Rep_12_003.tiff")
        df[['Group', 'Sample', 'Repeat']] = df['File Name'].apply(
            lambda x: pd.Series(extract_info_from_filename(x))
        )
        
        # Optionally, convert Sample and Repeat to integers to remove leading zeros.
        df['Sample'] = df['Sample'].apply(lambda x: int(x) if x.isdigit() else x)
        df['Repeat'] = df['Repeat'].apply(lambda x: int(x) if x.isdigit() else x)
        
        # Drop the original File Name column since it's no longer needed.
        df = df.drop(columns=["File Name"])
        
        # Rename the intensity columns to the desired names.
        df = df.rename(columns={
            "Average GFP Intensity": "GFP_Mean",
            "Average DSred Intensity": "DSred_Mean"
        })
        
        # Create a cell order within each (Group, Sample, Repeat) group for pivoting.
        df['cell_order'] = df.groupby(['Group', 'Sample', 'Repeat']).cumcount() + 1
        
        # At this point, the detailed cell data has the following columns:
        # Group, Sample, Repeat, Mask Index, GFP_Mean, DSred_Mean, cell_order
        # Each row represents one cell.
        
        # Create the wide-format DataFrame: each unique (Group, Sample, Repeat) is one row,
        # and each cell measurement becomes its own column.
        df_wide = df.pivot_table(
            index=['Group', 'Sample', 'Repeat'], 
            columns='cell_order', 
            values=['GFP_Mean', 'DSred_Mean']
        )
        
        # Flatten the MultiIndex columns. The resulting columns will be like "GFP_Mean_1", "GFP_Mean_2", etc.
        df_wide.columns = [f"{col[0]}_{col[1]}" for col in df_wide.columns]
        df_wide.reset_index(inplace=True)
        
        # Write both the detailed and wide data to an Excel file with separate worksheets.
        output_excel = os.path.join(output_dir, "analysis_output.xlsx")
        with pd.ExcelWriter(output_excel, engine="openpyxl") as writer:
            df.to_excel(writer, sheet_name="Detailed Cell Data", index=False)
            df_wide.to_excel(writer, sheet_name="Wide Data", index=False)
        print("Excel file with Detailed Cell Data and Wide Data saved to", output_excel)
    except Exception as e:
        print("Error generating Excel output:", e)

# %% Call the Functions Directly (Without argparse)
# Set up directories (adjust these paths if necessary)
nd2_dir = r"A:\_Ongoing\20250128_ActRep_version2_24well"
output_dir = os.path.join(nd2_dir, "Output")
dsred_tiff_dir = os.path.join(output_dir, "60x_DSRED_TIFF")
gfp_tiff_dir   = os.path.join(output_dir, "60x_GFP_TIFF")
mask_dir       = os.path.join(output_dir, "mask")

# Create directories if they do not exist
os.makedirs(output_dir, exist_ok=True)
os.makedirs(dsred_tiff_dir, exist_ok=True)
os.makedirs(gfp_tiff_dir, exist_ok=True)
os.makedirs(mask_dir, exist_ok=True)

# Step 1: ND2-to-TIFF Conversion
print("\n=== ND2-to-TIFF Conversion ===")
run_conversion(nd2_dir, dsred_tiff_dir, gfp_tiff_dir)

# Step 2: Cell Mask Generation
print("\n=== Cell Mask Generation ===")
try:
    model = models.Cellpose(gpu=torch.cuda.is_available(), model_type="cyto3")
    print("Cellpose model initialized.")
except Exception as e:
    print("Error initializing Cellpose model:", e)
    print("Falling back to CPU mode with cyto2.")
    model = models.Cellpose(gpu=False, model_type="cyto2")
run_mask_generation(dsred_tiff_dir, mask_dir, model)

# Call Step 3: Intensity Calculation and Excel Output (Detailed and Wide Data)
print("\n=== Intensity Calculation and Excel Output ===")
run_calculation(dsred_tiff_dir, gfp_tiff_dir, mask_dir, output_dir)


In [None]:
import os
import re
import pandas as pd

def extract_info_from_filename(filename):
    """
    Extracts group (Act/Rep), sample (e.g., 01), and repeat (e.g., 001) from a filename.
    For example, given "Act_01_001.tiff" it returns ("Act", "01", "001").
    """
    # Remove any time suffix if present (e.g., _t0)
    base = re.sub(r'_t\d+', '', os.path.splitext(filename)[0])
    pattern = re.compile(r'^(Act|Rep)_(\d{2})_(\d{3})', re.IGNORECASE)
    m = pattern.search(base)
    if m:
        return m.group(1), m.group(2), m.group(3)
    else:
        return 'Unknown', 'Unknown', 'Unknown'

# Path to the previously generated CSV file with cell intensities
csv_file = r"A:\_Ongoing\20250128_ActRep_version2_24well\Output\cell_intensities.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file)

# The CSV is expected to have these columns:
# "File Name", "Mask Index", "Average GFP Intensity", "Average DSred Intensity"

# Extract Group, Sample, and Repeat from the "File Name" column.
df[['Group', 'Sample', 'Repeat']] = df['File Name'].apply(lambda x: pd.Series(extract_info_from_filename(x)))

# Optionally, you can convert Sample and Repeat to integers (to remove leading zeros)
# Uncomment the following lines if that is desired:
# df['Sample'] = df['Sample'].apply(lambda x: int(x) if x.isdigit() else x)
# df['Repeat'] = df['Repeat'].apply(lambda x: int(x) if x.isdigit() else x)

# Drop the original "File Name" column if you no longer need it
df = df.drop(columns=["File Name"])

# Optionally, rename the intensity columns to shorter names
df = df.rename(columns={
    "Average GFP Intensity": "GFP_Mean",
    "Average DSred Intensity": "DSred_Mean"
})

# Rearrange the columns if needed so that Group, Sample, Repeat come first:
df = df[['Group', 'Sample', 'Repeat', 'Mask Index', 'GFP_Mean', 'DSred_Mean']]

# Specify the output Excel file path
excel_output = r"A:\_Ongoing\20250128_ActRep_version2_24well\Output\name_summary.xlsx"

# Write the DataFrame to an Excel file (one worksheet)
df.to_excel(excel_output, index=False)

print("New Excel summary saved to", excel_output)
