In [1]:
# **Imports**
import os
import pandas as pd
from tkinter import filedialog, Tk

In [2]:
# **Function Definitions**

def crawl_files(input_folder):
    """
    Recursively gather all *_cell_df.csv and *_puncta_df.csv files in the input folder.

    Parameters:
    - input_folder (str): Path to the primary input folder.

    Returns:
    - List[str]: List of file paths that match the specified patterns.
    """
    files = []
    for root, _, filenames in os.walk(input_folder):
        for filename in filenames:
            if filename.endswith("_cell_df.csv") or filename.endswith("_puncta_df.csv"):
                files.append(os.path.join(root, filename))
    return files

def process_cell_df(df, conversion_factor):
    """
    Update specific columns in *_cell_df.csv based on the user-provided conversion factor.

    Parameters:
    - df (pd.DataFrame): DataFrame containing the cell data.
    - conversion_factor (float): User-provided pixel-to-micron squared conversion factor.

    Returns:
    - pd.DataFrame: Updated DataFrame.
    """
    try:
        # Ensure required columns exist
        required_columns = ['puncta_micron_area_mean', 'puncta_micron_area_std', 
                            'image_resolution_um_per_px_sq', 'area']
        for col in required_columns:
            if col not in df.columns:
                raise KeyError(f"Missing column: {col}")

        # Back-calculate pixel values from existing micron areas using old conversion factor
        old_conversion_factor = df['image_resolution_um_per_px_sq']

        # Avoid division by zero
        if (old_conversion_factor == 0).any():
            raise ValueError("Old conversion factor contains zero(s), cannot divide by zero.")

        # Update 'puncta_micron_area_mean'
        df['puncta_micron_area_mean'] = (df['puncta_micron_area_mean'] / old_conversion_factor) * conversion_factor

        # Update 'puncta_micron_area_std'
        df['puncta_micron_area_std'] = (df['puncta_micron_area_std'] / old_conversion_factor) * conversion_factor

        # Replace 'image_resolution_um_per_px_sq' with the user-provided conversion factor
        df['image_resolution_um_per_px_sq'] = conversion_factor

        # Calculate 'cell_micron_area' from 'area' and the conversion factor
        df['cell_micron_area'] = df['area'] * conversion_factor

        return df
    except Exception as e:
        raise e

def process_puncta_df(df, conversion_factor):
    """
    Update the 'micron area' column in *_puncta_df.csv based on the user-provided conversion factor.

    Parameters:
    - df (pd.DataFrame): DataFrame containing the puncta data.
    - conversion_factor (float): User-provided pixel-to-micron squared conversion factor.

    Returns:
    - pd.DataFrame: Updated DataFrame.
    """
    try:
        # Ensure required columns exist
        required_columns = ['micron area', 'area']
        for col in required_columns:
            if col not in df.columns:
                raise KeyError(f"Missing column: {col}")

        # Calculate 'micron area' from 'area' and the conversion factor
        df['micron area'] = df['area'] * conversion_factor

        return df
    except Exception as e:
        raise e

def process_files(files, conversion_factor):
    """
    Process and overwrite all relevant files with updated data.

    Parameters:
    - files (List[str]): List of file paths to process.
    - conversion_factor (float): User-provided pixel-to-micron squared conversion factor.

    Returns:
    - Tuple[int, List[Tuple[str, str]]]: Number of successfully processed files and a list of skipped files with error messages.
    """
    processed_count = 0
    skipped_files = []

    for file in files:
        try:
            df = pd.read_csv(file)

            if file.endswith("_cell_df.csv"):
                df = process_cell_df(df, conversion_factor)
            elif file.endswith("_puncta_df.csv"):
                df = process_puncta_df(df, conversion_factor)
            else:
                # This should not happen due to the crawling function, but added for safety
                raise ValueError("File does not match expected patterns.")

            # Overwrite the original file with the updated DataFrame
            df.to_csv(file, index=False)
            processed_count += 1

        except Exception as e:
            skipped_files.append((file, str(e)))
            print(f"Error processing file {file}: {e}")

    return processed_count, skipped_files

In [3]:
pixel_resolution = 512
micron_resolution = 50
upscale_factor = 2

In [4]:
microns_per_pixel_sq = (micron_resolution / (pixel_resolution*upscale_factor)) ** 2

In [5]:
microns_per_pixel_sq

0.002384185791015625

In [6]:
#microns_per_pixel_sq = 0.00238

In [None]:
def main(conversion_factor):
    """
    Main function to handle user interaction, file crawling, processing, and summarizing results.
    """
    # Hide the main Tkinter window
    root = Tk()
    root.withdraw()

    # **User Interaction**

    # Prompt user to select the primary input folder
    print("Please select the primary input folder containing the .csv files.")
    input_folder = filedialog.askdirectory(title="Select Input Folder")

    if not input_folder:
        print("No folder selected. Exiting the script.")
        return

    # **File Crawling and Processing**

    print("\nCrawling through the selected folder to find relevant .csv files...")
    files_to_process = crawl_files(input_folder)
    total_files = len(files_to_process)
    print(f"Found {total_files} file(s) to process.")

    if total_files == 0:
        print("No files matched the specified patterns. Exiting the script.")
        return

    print("\nProcessing files...")
    processed_count, skipped_files = process_files(files_to_process, conversion_factor)

    # **Output Summary**

    print("\n--- Processing Complete ---")
    print(f"Total files found: {total_files}")
    print(f"Successfully processed: {processed_count}")
    print(f"Skipped files: {len(skipped_files)}")


    if skipped_files:
        print("\nDetails of skipped files:")
        for file, error in skipped_files:
            print(f"- {file}: {error}")


In [8]:
main(microns_per_pixel_sq)

Please select the primary input folder containing the .csv files.


2024-11-19 10:20:30.373 python[41292:10847320] +[IMKClient subclass]: chose IMKClient_Legacy
2024-11-19 10:20:30.768 python[41292:10847320] The class 'NSOpenPanel' overrides the method identifier.  This method is implemented by class 'NSWindow'



Crawling through the selected folder to find relevant .csv files...
Found 90 file(s) to process.

Processing files...
Error processing file /Users/christian.neureuter/Library/CloudStorage/Box-Box/Banerjee_Lab_Shared_Folder/Meet/FUS-FL-GFP-Analysis/FUS-FL-(31st Oct 2024)/IM-6-FUS-FL-GFP_puncta_df.csv: 'Missing column: micron area'
Error processing file /Users/christian.neureuter/Library/CloudStorage/Box-Box/Banerjee_Lab_Shared_Folder/Meet/FUS-FL-GFP-Analysis/FUS-FL-(31st Oct 2024)/IM-6-FUS-FL-GFP_cell_df.csv: 'Missing column: puncta_micron_area_mean'

--- Processing Complete ---
Total files found: 90
Successfully processed: 88
Skipped files: 2

Details of skipped files:
- /Users/christian.neureuter/Library/CloudStorage/Box-Box/Banerjee_Lab_Shared_Folder/Meet/FUS-FL-GFP-Analysis/FUS-FL-(31st Oct 2024)/IM-6-FUS-FL-GFP_puncta_df.csv: 'Missing column: micron area'
- /Users/christian.neureuter/Library/CloudStorage/Box-Box/Banerjee_Lab_Shared_Folder/Meet/FUS-FL-GFP-Analysis/FUS-FL-(31st Oct 

In [None]:
def main():
    """
    Main function to handle user interaction, file crawling, processing, and summarizing results.
    """
    # Hide the main Tkinter window
    root = Tk()
    root.withdraw()

    # **User Interaction**

    # Prompt user to select the primary input folder
    print("Please select the primary input folder containing the .csv files.")
    input_folder = filedialog.askdirectory(title="Select Input Folder")

    if not input_folder:
        print("No folder selected. Exiting the script.")
        return

    # Prompt user to input the pixel-to-micron squared conversion factor
    while True:
        try:
            user_input = input("Enter the pixel-to-micron squared conversion factor (e.g., 0.25): ")
            conversion_factor = float(user_input)
            if conversion_factor <= 0:
                raise ValueError("Conversion factor must be a positive number.")
            break
        except ValueError as ve:
            print(f"Invalid input: {ve}. Please try again.")

    # **File Crawling and Processing**

    print("\nCrawling through the selected folder to find relevant .csv files...")
    files_to_process = crawl_files(input_folder)
    total_files = len(files_to_process)
    print(f"Found {total_files} file(s) to process.")

    if total_files == 0:
        print("No files matched the specified patterns. Exiting the script.")
        return

    print("\nProcessing files...")
    processed_count, skipped_files = process_files(files_to_process, conversion_factor)

    # **Output Summary**

    print("\n--- Processing Complete ---")
    print(f"Total files found: {total_files}")
    print(f"Successfully processed: {processed_count}")
    print(f"Skipped files: {len(skipped_files)}")

    if skipped_files:
        print("\nDetails of skipped files:")
        for file, error in skipped_files:
            print(f"- {file}: {error}")

# **Run the Main Function**

if __name__ == "__main__":
    main()
