# Count peaks per file

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
import os

def analyze_calcium_data(filename):
    # Read the data from the text file
    data = pd.read_csv(filename, delimiter='\t', header=None, skiprows=1)

    # Get the column names for ROIs
    roi_columns = list(data.columns[1:])

    # Create a dictionary to store peak information for each ROI
    roi_peaks = {}

    # Iterate over each ROI
    for roi in roi_columns:
        # Get the data for the current ROI
        roi_data = data[[0, roi]].dropna().reset_index(drop=True)
        frames = roi_data[0]
        intensity = roi_data[roi]

        # Calculate ▲F/F
        baseline = intensity.mean()
        dff = (intensity - baseline) / baseline

        # Find peaks using the saw shape pattern criteria
        peaks, _ = find_peaks(dff, distance=50, prominence=0.1, width=50)

        # Plot the ▲F/F with detected peaks (wider plot)
        plt.figure(figsize=(12, 6))  # Set figsize to make the plot wider
        plt.plot(frames, dff, color='black')  # Set line color to black
        plt.plot(frames[peaks], dff[peaks], 'ro')  # Set peaks as red circles
        plt.xlabel('Frames')
        plt.ylabel('▲F/F')
        plt.title(f'ROI: {roi}')
        
        # Manually set y-axis limits to ensure all data points are visible
        plt.ylim([dff.min() - 0.1, dff.max() + 0.1])
        
        # Show the current plot
        plt.show()

        # Store peak information for the current ROI
        roi_peaks[roi] = len(peaks)

        # Close the current figure to prevent memory issues
        plt.close()

    return roi_peaks

# Rest of your code remains unchanged


def generate_data_frames(roi_peaks):
    # Combine peak counts for all ROIs
    total_peaks = sum(roi_peaks.values())

    # Create DataFrame 1: Total peaks, active cells, average peak per active cell
    active_rois = [roi for roi, peaks in roi_peaks.items() if peaks > 0]
    active_cells = len(active_rois)
    average_peak = total_peaks / active_cells if active_cells > 0 else 0

    df1 = pd.DataFrame({'Total Peaks': [total_peaks],
                        'Active Cells': [active_cells],
                        'Average Peak per Active Cell': [average_peak]})

    # Create DataFrame 2: ROIs with peaks and their counts
    df2 = pd.DataFrame({'ROI': active_rois,
                        'Peak Count': [roi_peaks[roi] for roi in active_rois]})

    return df1, df2

# New function to save DataFrames to Excel file
def save_data_frames_to_excel(filename, df1, df2):
    # Split the input filename to get the directory and base name
    directory, base_name = os.path.split(filename)
    
    # Generate the output Excel file name (same name as input file)
    output_filename = os.path.join(directory, f"{os.path.splitext(base_name)[0]}.xlsx")
    
    # Create a Pandas Excel writer using xlsxwriter as the engine
    writer = pd.ExcelWriter(output_filename, engine='xlsxwriter')
    
    # Write each DataFrame to a specific sheet name
    df1.to_excel(writer, sheet_name='DataFrame1', index=False)
    df2.to_excel(writer, sheet_name='DataFrame2', index=False)
    
    # Save the Excel file
    writer.save()

# Change path here to which file you want to investigate:
filename = 'H:/Calcium imaging/20230811/extracted_frames_total/Roi_files_stardist/Results/WT_ptz_1min_1_MM.txt'
roi_peaks = analyze_calcium_data(filename)
df1, df2 = generate_data_frames(roi_peaks)

# Save the DataFrames to an Excel file
save_data_frames_to_excel(filename, df1, df2)

print("DataFrame 1:")
print(df1)
print("\nDataFrame 2:")
print(df2)
print(f"Data saved to {os.path.splitext(filename)[0]}.xlsx")

# Combine all .txt files in a folder to analyse whole experiment all at once

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
import os
import numpy as np

def analyze_calcium_data(filename):
    # Read the data from the text file
    data = pd.read_csv(filename, delimiter='\t', header=None, skiprows=1)

    # Get the column names for ROIs
    roi_columns = list(data.columns[1:])

    # Create a dictionary to store peak information for each ROI
    roi_peaks = {}

    # Iterate over each ROI
    for roi in roi_columns:
        # Get the data for the current ROI
        roi_data = data[[0, roi]].dropna().reset_index(drop=True)
        frames = roi_data[0]
        intensity = roi_data[roi]

        # Calculate ▲F/F
        baseline = intensity.mean()
        dff = (intensity - baseline) / baseline

        # Check for and handle NaN or Inf values in dff, This was done because in some cases because computers crashed
        if np.any(np.isnan(dff)) or np.any(np.isinf(dff)):
            print(f"Skipping ROI {roi} due to NaN or Inf values.")
            continue

        # Find peaks using the saw shape pattern criteria
        peaks, _ = find_peaks(dff, height=0.01, distance=25, width=25)

        # If there are no valid peaks, skip this ROI, This was done because in some cases because computers crashed
        if len(peaks) == 0:
            print(f"No valid peaks found for ROI {roi}. Skipping.") 
            continue

        # Plot the ▲F/F with detected peaks (wider plot)
        plt.figure(figsize=(10, 6))  # Set figsize to make the plot wider
        plt.plot(frames, dff, color='black')  # Set line color to black
        plt.plot(frames[peaks], dff[peaks], 'ro')  # Set peaks as red circles
        plt.xlabel('Frames')
        plt.ylabel('▲F/F')
        
        # Extract only the file name from the full path
        file_name = os.path.basename(filename)
        
        # Set the title with just the file name and ROI number
        plt.title(f'ROI: {roi} (File: {file_name})')
        
        # Manually set y-axis limits to ensure all data points are visible
        plt.ylim([dff.min() - 0.1, dff.max() + 0.1])
        
        # Show the current plot
        plt.show()

        # Store peak information for the current ROI
        roi_peaks[roi] = len(peaks)

        # Close the current figure to prevent memory issues
        plt.close()

    # Add code to compute the total number of cells in the file
    total_cells = len(roi_columns)  # Total number of ROIs is the total number of cells

    return roi_peaks, total_cells

def generate_data_frames(roi_peaks, source_file, total_cells):
    # Create DataFrame 1: Total peaks, active cells, total peaks in active cells, average peak per active cell, % active cells
    total_peaks = sum(roi_peaks.values())
    
    # Filter ROIs with more than two peaks to be considered active cells
    active_cells = [roi for roi, peaks in roi_peaks.items() if peaks > 1]
    
    # Calculate the total peaks within active cells
    total_peaks_active = sum([roi_peaks[roi] for roi in active_cells])

    # Calculate the average peak per active cell based on active cells only
    average_peak_active = total_peaks_active / len(active_cells) if len(active_cells) > 0 else 0
    
    # Calculate the percentage of active cells
    percent_active_cells = (len(active_cells) / total_cells) * 100 if total_cells > 0 else 0

    df1 = pd.DataFrame({'Total Peaks': [total_peaks],
                        'Active Cells': [len(active_cells)],
                        'Total Peaks in Active Cells': [total_peaks_active],
                        'Average Peak per Active Cell': [average_peak_active],
                        '% Active Cells': [percent_active_cells],
                        'Total Cells': [total_cells],
                        'Source File': [source_file]})

    # Create DataFrame 2: ROIs with peaks and their counts
    df2 = pd.DataFrame({'ROI': list(roi_peaks.keys()),
                        'Peak Count': list(roi_peaks.values()),
                        'Source File': [source_file] * len(roi_peaks)})

    return df1, df2


def process_folder(folder_path):
    # Initialize an empty list to store the dataframes from all files
    all_dataframes = []

    # Iterate over all files in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            file_path = os.path.join(folder_path, filename)
            print(f"Processing file: {file_path}")

            # Analyze the calcium data for the current file
            roi_peaks, total_cells = analyze_calcium_data(file_path)

            # Generate dataframes for the current file
            df1, df2 = generate_data_frames(roi_peaks, filename, total_cells)

            # Append the dataframes to the list
            all_dataframes.append((df1, df2))

            # Print the dataframes for the current file
            print("DataFrame 1:")
            print(df1)
            print("\nDataFrame 2:")
            print(df2)

    # Concatenate all dataframes into one
    final_df1 = pd.concat([df[0] for df in all_dataframes], ignore_index=True)
    final_df2 = pd.concat([df[1] for df in all_dataframes], ignore_index=True)

    # Save the concatenated dataframes to a single Excel file
    output_filename = os.path.join(folder_path, "Combined_results.xlsx")
    with pd.ExcelWriter(output_filename, engine='xlsxwriter') as writer:
        final_df1.to_excel(writer, sheet_name='DataFrame1', index=False)
        final_df2.to_excel(writer, sheet_name='DataFrame2', index=False)

    print(f"Data saved to {output_filename}")

# Change the path here to what folder you want investigate:
folder_path = 'H:/Calcium imaging/20231123/Results'
process_folder(folder_path)


## Replace \ with /

In [1]:
input_string = r"H:\Calcium imaging\20231123\Results"

# Replace backslashes with forward slashes
output_string = input_string.replace("\\", "/")

# Print the modified string
print(output_string)

H:/Calcium imaging/20231123/Results
