In [None]:
"""
Step 1: Group CSV Files by Image and Combine into Excel
-------------------------------------------------

This script combines raw ROI mean intensity values from four-channel calcium imaging experiments 
processed using the CARMEN Fiji macro (CARMEN_MeasureFourChannelROI.ijm). 
It scans a folder of CSV exports (one file per image and channel), groups them by base name, 
and merges the values into a single structured Excel file.

Functionality:
- Scans a folder for CSV files matching the naming convention (e.g., "Image01_Intensity FP1.csv").
- Groups CSVs by image base name, ensuring each group contains exactly four files (FP1 to FP4).
- Loads each CSV and writes its data into a separate sheet in an Excel file.
- Saves the output as "<base_name>_analysis.xlsx" in the same folder.

Assumptions:
- CSV files follow the naming convention: "<base_name>_Intensity FPX.csv".
- Each CSV represents one fluorescence channel (BFP, GFP, RFP, NIR).
- Files are stored together in a single input directory, set by `file_path`.

Outputs:
- One Excel file per image group: "<base_name>_analysis.xlsx"
  - Contains four sheets: FP1, FP2, FP3, FP4 — each corresponding to one channel.

Usage:
- Set the `file_path` variable to the folder containing your CARMEN CSV exports.
- Run the script in a Python environment with `pandas` installed.
"""
import pandas as pd
import os
from collections import defaultdict

# Define the path to your CSV files
file_path = r"your\file\path"  # Replace with actual path to your CARMEN CSV output

# Set suffix for the resulting Excel files
excel_suffix = '_analysis'

# Initialize dictionary to group files by base name (i.e., per image)
grouped_files = defaultdict(list)

# Group CSVs by base name: e.g., Image01 from Image01_FP1.csv, Image01_FP2.csv, etc.
for file in os.listdir(file_path):
    if file.endswith(".csv"):
        # Extract base name (before _FP1, _FP2, etc.)
        base_name = file.rsplit('_', 1)[0]
        grouped_files[base_name].append(file)

# Optional: track the created Excel file paths
excel_file_paths = []

# Loop through each group and combine into an Excel file
for base_name, files in grouped_files.items():
    # Only proceed if there are exactly 4 files (assumes 4 fluorescence planes)
    if len(files) == 4:
        # Sort files to enforce FP1 → FP4 order
        files.sort(key=lambda x: int(x[-5]))  # assumes last digit before '.csv' is 1–4

        # Load each CSV into a dictionary with keys like 'FP1', 'FP2', etc.
        csv_files = {}
        for file in files:
            suffix = file.rsplit('_', 1)[1].split('.')[0]  # Extract FP1, FP2...
            full_path = os.path.join(file_path, file)
            csv_files[suffix] = pd.read_csv(full_path)

        # Create a new Excel workbook for this image
        output_excel_path = os.path.join(file_path, f'{base_name}{excel_suffix}.xlsx')
        with pd.ExcelWriter(output_excel_path) as writer:
            for sheet_name, df in csv_files.items():
                # Each CSV becomes one sheet in the Excel file
                df.to_excel(writer, sheet_name=sheet_name, index=False)

        print(f"CSV files for {base_name} successfully saved to {base_name}{excel_suffix}.xlsx.")
    
    else:
        # Notify if a group doesn't have the expected 4 files
        print(f"Warning: {base_name} does not have exactly 4 corresponding CSV files.")


In [None]:
"""
Step 2: Load Template, Transfer Data from CARMEN Sheets, and Clean Up
-----------------------------------------------------------------

This script takes the structured Excel output files from STEP 1 and copies
their contents into a macro-enabled Excel template (CARMEN_template.xlsm). It maps each 
fluorescence channel to a specific sheet in the template and prepares the data for further 
processing such as bleach curve fitting.

Functionality:
- Opens each "<base_name>_analysis.xlsx" file and the CARMEN Excel template.
- Copies headers and ROI intensity data from FP1 to FP4 into the corresponding template sheets:
  - FP1 → BFP
  - FP2 → GFP
  - FP3 → RFP
  - FP4 → NIR
- Additionally copies the last column of each ROI dataset (e.g., background or selected value) into column 'BD'.
- Cleans up all template sheets by deleting rows below the last valid data row.
- Saves a new macro-enabled Excel file: "<base_name>_analysis_output.xlsm".

Assumptions:
- Input files follow the naming convention: "<base_name>_analysis.xlsx".
- A working macro-enabled Excel template file (CARMEN_template.xlsm) is available.
- Data in each input sheet begins at cell C1 (headers) and C2 (data).
- Output sheets in the template are named 'BFP', 'GFP', 'RFP', and 'NIR'.

Outputs:
- For each image group, saves: "<base_name>_analysis_output.xlsm"
  - Structured and cleaned Excel template with inserted raw intensity data.

Usage:
- Set `template_folder` and `results_folder` to the correct paths.
- Ensure `CARMEN_template.xlsm` exists and ends with `.xlsm`.
- Run the script in a Python environment with `xlwings` installed.
"""

import xlwings as xw
import os

# Define paths – adjust to your directory structure
template_folder = r"your\file\path\template"
results_folder = r"your\file\path"
template_file = os.path.join(template_folder, "CARMEN_template.xlsm")

# Ensure the template file ends with ".xlsm" for macro compatibility

# Loop through all result Excel files that match the CARMEN format
for file_name in os.listdir(results_folder):
    if file_name.endswith("_analysis.xlsx"):
        data_file_path = os.path.join(results_folder, file_name)

        # Launch Excel in background mode (no UI, no alerts)
        app = xw.App(visible=False)
        app.display_alerts = False
        app.screen_updating = False

        # Open the analysis result file and the template workbook
        template = app.books.open(template_file)
        data_file = app.books.open(data_file_path)

        # Map raw data sheets (FP1–FP4) to their corresponding template target sheets
        sheet_mappings = {
            "Intensity FP1": "BFP",
            "Intensity FP2": "GFP",
            "Intensity FP3": "RFP",
            "Intensity FP4": "NIR"
        }

        # Track the deepest row of data to clean up template below it
        last_data_row = 0

        for data_sheet, template_sheet in sheet_mappings.items():
            try:
                # Check if expected sheet is present in the analysis file
                if data_sheet not in [sheet.name for sheet in data_file.sheets]:
                    print(f"Sheet '{data_sheet}' not found in {file_name}")
                    continue

                # Read header and data range from source sheet
                data_sheet_obj = data_file.sheets[data_sheet]
                headers = data_sheet_obj.range('C1').value  # Assumes header starts at C1
                data_range = data_sheet_obj.range('C2').expand('table').value  # Assumes data starts at C2

                if not data_range:
                    print(f"No data found in {data_sheet} in {file_name}")
                    continue

                # Track the longest data section for cleanup purposes
                last_data_row = max(last_data_row, len(data_range) + 1)

                # Write header and data to corresponding sheet in the template (starting in column G)
                template_sheet_obj = template.sheets[template_sheet]
                template_sheet_obj.range('G1').value = [headers]           # Headers in row 1
                template_sheet_obj.range('G2').value = data_range          # Data starting in row 2

                # Copy last column of each data row (e.g., background or ROI max) into column BD
                last_col_data = [row[-1] for row in data_range]
                template_sheet_obj.range('BD2').value = [[val] for val in last_col_data]

                print(f"Processed {data_sheet} in {file_name}")

            except Exception as e:
                print(f" Error processing {data_sheet} in {file_name}: {e}")

        #  Cleanup: Remove leftover rows from all sheets below the data region
        for template_sheet in template.sheets:
            try:
                max_rows = template_sheet.api.UsedRange.Rows.Count
                if last_data_row < max_rows:
                    # Delete extra rows to avoid miscalculations or leftover formulas
                    template_sheet.api.Rows(f"{last_data_row + 1}:{max_rows}").Delete()
                print(f"Cleaned extra rows in sheet '{template_sheet.name}'")
            except Exception as e:
                print(f" Error cleaning rows in sheet '{template_sheet.name}': {e}")

        #  Save template as new file, preserving macros and structure
        output_file_path = os.path.join(results_folder, f"{os.path.splitext(file_name)[0]}_output.xlsm")
        template.save(output_file_path)

        #  Close files and Excel instance
        template.close()
        data_file.close()
        app.quit()


In [None]:
"""
STEP 3: CARMEN Calcium Multiplexing Bleaching Correction and Curve Fitting
-------------------------------------------------------------------

This script performs exponential decay fitting on time course data from four-channel calcium imaging experiments 
prepared using the CARMEN Fiji macro and transferred into structured Excel templates. It assumes that each image 
has already been processed through background subtraction and inserted into a macro-enabled Excel file.

Functionality:
- Opens each structured `.xlsm` file and reads the 'Bleaching' sheet containing time and signal data.
- Scales time values to improve numerical stability during fitting.
- Removes local outliers using a sliding window method.
- Fits an exponential decay model to each ROI time trace using `scipy.optimize.curve_fit`.
- Writes the fitted curves into corresponding output sheets (BFP, GFP, RFP, NIR) starting from column 'DF'.

Assumptions:
- Excel files follow the naming convention: "<base_name>_analysis_output.xlsm".
- The 'Bleaching' sheet contains time data in column A and signal intensities in defined column blocks:
  - BFP: columns D to BA
  - GFP: columns BC to CZ
  - RFP: columns DB to EY
  - NIR: columns FA to GX
- Time values are in seconds or another uniform unit and appear without missing rows.
- The last row of valid data is automatically detected (NaNs, empty cells mark the end).
- The Excel template already contains output sheets named 'BFP', 'GFP', 'RFP', and 'NIR'.

Outputs:
- For each input file, overwrites the original .xlsm file with:
  - Fitted decay curves for each ROI written into the respective channel sheet starting from column 'DF'.
  - Cleaned-up 'Bleaching' sheet with extraneous rows removed below valid data.

Usage:
- Manually inspect and adjust time windows and bleach start in the Excel file before running.
- Set `results_folder` to the path containing your CARMEN `.xlsm` files.
- Run the script in a Python environment with `xlwings`, `numpy`, and `scipy` installed.
"""

import xlwings as xw
import os
import numpy as np
from scipy.optimize import curve_fit

# Define folder containing the .xlsm files from previous step
results_folder = r"your\file\path"

# Define exponential decay function for curve fitting
def exp_decay(x, a, b, c):
    return a * np.exp(-b * x) + c

# Local outlier removal: compares each point to surrounding values in a window
def remove_outliers_local_window(data, window_size=3, threshold=2.5):
    filtered_data = data.copy()
    n = len(data)
    
    for i in range(n):
        start = max(0, i - window_size)
        end = min(n, i + window_size + 1)
        surrounding_points = np.concatenate([data[start:i], data[i+1:end]])
        surrounding_points = surrounding_points[np.isfinite(surrounding_points)]  # Ignore NaNs
        
        if len(surrounding_points) > 0:
            local_mean = np.mean(surrounding_points)
            local_std = np.std(surrounding_points)
            if abs(data[i] - local_mean) > threshold * local_std:
                filtered_data[i] = np.nan

    return filtered_data

# Loop through each .xlsm file that contains bleaching data to fit
for file_name in os.listdir(results_folder):
    if file_name.endswith("_analysis_output.xlsm"):
        data_file_path = os.path.join(results_folder, file_name)

        # Launch Excel invisibly
        app = xw.App(visible=False)
        app.display_alerts = False
        app.screen_updating = False

        workbook = app.books.open(data_file_path)

        try:
            # Access the bleaching data sheet
            bleaching_sheet = workbook.sheets['Bleaching']

            # Extract time data from column A
            time_data = bleaching_sheet.range('A2:A' + str(bleaching_sheet.cells.last_cell.row)).value
            time_data = [value if isinstance(value, (int, float)) else np.nan for value in time_data]
            time_data = np.array(time_data, dtype=float)

            # Determine last valid data row
            last_data_row = len(time_data)
            for i, value in enumerate(time_data):
                if np.isnan(value):
                    last_data_row = i
                    break
            time_data = time_data[:last_data_row]

            # Normalise time values for numerical stability
            time_data_max = np.max(time_data)
            time_data_scaled = time_data / time_data_max if time_data_max > 0 else time_data

            # Clean up rows below the data range
            max_rows = bleaching_sheet.cells.last_cell.row
            if last_data_row < max_rows:
                bleaching_sheet.api.Rows(f"{last_data_row + 2}:{max_rows}").Delete()

            # Define input data ranges for each channel (column start → end)
            fp_data_ranges = {
                'BFP': ('D', 'BA'),
                'GFP': ('BC', 'CZ'),
                'RFP': ('DB', 'EY'),
                'NIR': ('FA', 'GX')
            }

            # Mapping: output sheet name for each fluorescence protein
            fp_sheets = {
                'BFP': 'BFP',
                'GFP': 'GFP',
                'RFP': 'RFP',
                'NIR': 'NIR'
            }

            # Loop through each channel and fit decay curves column-wise
            for fp, (start_col, end_col) in fp_data_ranges.items():
                data_range = bleaching_sheet.range(f'{start_col}2:{end_col}{last_data_row + 1}').value
                data_range = [[val if isinstance(val, (int, float)) else np.nan for val in row] for row in data_range]
                data_range = np.array(data_range, dtype=float)
                if data_range.ndim == 1:
                    data_range = data_range[:, np.newaxis]

                output_sheet = workbook.sheets[fp_sheets[fp]]
                save_start_col = 110  # Column DF (0-based index)

                for col_idx in range(data_range.shape[1]):
                    col_data = data_range[:, col_idx]
                    save_col = save_start_col + col_idx

                    # Remove local outliers
                    col_data_no_outliers = remove_outliers_local_window(col_data)

                    # Skip non-positive or empty columns
                    if np.all(col_data_no_outliers <= 0):
                        print(f"Skipping column index {col_idx} in {fp_sheets[fp]} due to non-positive values")
                        output_sheet.range((2, save_col)).clear_contents()
                        continue

                    # Mask valid points for fitting
                    valid_mask = np.isfinite(time_data) & np.isfinite(col_data_no_outliers)
                    valid_time = time_data_scaled[valid_mask]
                    valid_col_data = col_data_no_outliers[valid_mask]

                    # Ensure sufficient data points
                    if len(valid_time) < 2 or len(valid_col_data) < 2:
                        print(f"Skipping column index {col_idx} in {fp_sheets[fp]} due to insufficient valid data")
                        output_sheet.range((2, save_col)).clear_contents()
                        continue

                    # Fit exponential decay to valid data points
                    try:
                        p0 = [400, 0.003, 300]  # Initial guess for [a, b, c]
                        bounds = ([0, 0, -np.inf], [np.inf, np.inf, np.inf])  # Parameter bounds
                        popt, _ = curve_fit(exp_decay, valid_time, valid_col_data, p0=p0, maxfev=10000, bounds=bounds)
                        fitted_curve = exp_decay(time_data_scaled, *popt)
                        fitted_curve = [val if np.isfinite(val) else '' for val in fitted_curve]

                        # Write fitted data to output sheet
                        output_sheet.range((2, save_col)).value = [[val] for val in fitted_curve]

                    except RuntimeError as e:
                        print(f"Fit failed for column index {col_idx} in {fp_sheets[fp]}: {e}")
                        output_sheet.range((2, save_col)).clear_contents()

        except Exception as e:
            print(f" Error processing file {file_name}: {e}")
        finally:
            # Save and close workbook
            workbook.save()
            workbook.close()
            app.quit()
