In [1]:
pwd

'C:\\Users\\shuai'

In [2]:
cd C:\Python_temp

C:\Python_temp


In [3]:
import pandas as pd
import numpy as np
from scipy.signal import find_peaks, peak_widths, savgol_filter
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [4]:
# Preprocess the data
def preprocess_data(data, time_column, fluorescence_columns):
    time = data[time_column] * (900 / data[time_column].max())
    fluorescence_data = data[fluorescence_columns]
    window_size = 120

    # Rolling baseline correction
    rolling_baseline_corrected = fluorescence_data.apply(
        lambda x: (x - x.rolling(window=window_size, min_periods=1, center=True).median()) /
                  x.rolling(window=window_size, min_periods=1, center=True).median(),
        axis=0
    )

    # Apply Savitzky-Golay filter for smoothing
    smoothed_data = rolling_baseline_corrected.apply(
        lambda x: savgol_filter(x, window_length=11, polyorder=3), axis=0
    )

    return time, smoothed_data

In [5]:
# Enhanced Peak Detection for Oscillation Analysis
def enhanced_peak_detection(time, smoothed_data, prominence=0.05, height=0.05, distance=1, width_range=(1, 30)):
    peak_details = []
    for column in smoothed_data.columns:
        # Find peaks with adjustable parameters
        peaks, properties = find_peaks(
            smoothed_data[column],
            prominence=prominence,
            height=height,
            distance=distance
        )
        
        # Calculate peak widths at half-prominence
        widths, width_heights, left_ips, right_ips = peak_widths(smoothed_data[column], peaks, rel_height=0.5)
        
        valid_peaks = []
        for i, peak_idx in enumerate(peaks):
            width_seconds = widths[i] * (time.iloc[1] - time.iloc[0])
            if width_range[0] <= width_seconds <= width_range[1]:
                valid_peaks.append(i)

        total_duration = time.iloc[-1] - time.iloc[0]
        frequency = len(valid_peaks) / 900*1000

        if len(valid_peaks) == 0:
            # If no valid peaks are detected, fill with 0
            peak_details.append({
                "Region": column,
                "Time": 0,
                "Amplitude": 0,
                "Width": 0,
                "Prominence": 0,
                "Inter_Peak_Interval": 0,
                "Rise_Time": 0,
                "Decay_Time": 0,
                "AUC": 0,
                "Frequency": frequency,
                "Peak_Number": 0
            })
        else:
            for i in valid_peaks:
                peak_idx = peaks[i]
                width_seconds = widths[i] * (time.iloc[1] - time.iloc[0])
                inter_peak_interval = (time.iloc[peak_idx] - time.iloc[peaks[i-1]]) if i > 0 else 0

                # Calculate Rise Time
                left_base_idx = int(left_ips[i])
                rise_time = time.iloc[peak_idx] - time.iloc[left_base_idx]

                # Calculate Decay Time
                right_base_idx = int(right_ips[i])
                decay_time = time.iloc[right_base_idx] - time.iloc[peak_idx]

                # Calculate AUC
                auc = np.trapz(smoothed_data[column].iloc[left_base_idx:right_base_idx+1],
                               time.iloc[left_base_idx:right_base_idx+1])

                peak_details.append({
                    "Region": column,
                    "Time": time.iloc[peak_idx],
                    "Amplitude": properties["peak_heights"][i],
                    "Width": width_seconds,
                    "Prominence": properties["prominences"][i],
                    "Inter_Peak_Interval": inter_peak_interval,
                    "Rise_Time": rise_time,
                    "Decay_Time": decay_time,
                    "AUC": auc,
                    "Frequency": frequency,
                    "Peak_Number": 1
                })

    return pd.DataFrame(peak_details)

In [6]:
def export_peak_data(
    peak_df,
    csv_filename,  # <-- required to prefix outputs
    path='.',
    aggregated_filename=None,
    detailed_filename=None
):
    os.makedirs(path, exist_ok=True)
    peak_df.fillna(0, inplace=True)

    # Extract base name from input file (e.g., "GBML198_raw.csv" → "GBML198_raw")
    base = os.path.splitext(os.path.basename(csv_filename))[0]

    # Generate default filenames with CSV prefix
    if detailed_filename is None:
        detailed_filename = f"{base}_detailed_peak_data.csv"
    if aggregated_filename is None:
        aggregated_filename = f"{base}_aggregated_data.csv"

    # Full paths
    detailed_path = os.path.join(path, detailed_filename)
    aggregated_path = os.path.join(path, aggregated_filename)

    # Export detailed data
    peak_df.to_csv(detailed_path, index=False)

    # Aggregate per Region
    aggregated_df = peak_df.groupby('Region').agg({
        'Amplitude': 'mean',
        'Width': 'mean',
        'Prominence': 'mean',
        'Frequency': 'mean',
        'AUC': 'mean',
        'Inter_Peak_Interval': 'mean',
        'Rise_Time': 'mean',
        'Decay_Time': 'mean',
        'Peak_Number': 'sum'
    }).reset_index()

    # Export aggregated
    aggregated_df.to_csv(aggregated_path, index=False)

    print(f"Detailed peak data saved to: {detailed_path}")
    print(f"Aggregated peak data saved to: {aggregated_path}")

In [41]:
csv_file = "dmsoResults15min.csv"
data = pd.read_csv(csv_file)
print(data.columns)
fluorescence_columns = [col for col in data.columns if col != 'Time']
time, smoothed_data = preprocess_data(data, 'Time', fluorescence_columns)

Index(['Time', 'Mean1', 'Mean2', 'Mean3', 'Mean4', 'Mean5', 'Mean6', 'Mean7',
       'Mean8', 'Mean9',
       ...
       'Mean144', 'Mean145', 'Mean146', 'Mean147', 'Mean148', 'Mean149',
       'Mean150', 'Mean151', 'Mean152', 'Mean153'],
      dtype='object', length=154)


In [42]:
peak_df = enhanced_peak_detection(time, smoothed_data, prominence=0.05, height=0.05, distance=1, width_range=(1, 60))

In [43]:
print(peak_df.head())

  Region   Time  Amplitude      Width  Prominence  Inter_Peak_Interval  \
0  Mean1  864.0   0.664046  14.046477    0.725435                  0.0   
1  Mean2  327.0   0.198667  12.012516    0.205264                  0.0   
2  Mean3  279.0   0.317836   9.086748    0.326888                  0.0   
3  Mean4    0.0   0.000000   0.000000    0.000000                  0.0   
4  Mean5  864.0   0.083772  13.195030    0.084704                  0.0   

   Rise_Time  Decay_Time       AUC  Frequency  Peak_Number  
0        7.5         7.5  7.469615   1.111111            1  
1        6.0         6.0  1.901648   1.111111            1  
2        6.0         3.0  2.212049   1.111111            1  
3        0.0         0.0  0.000000   0.000000            0  
4        7.5         6.0  0.880020   1.111111            1  


In [44]:
export_peak_data(peak_df,csv_filename=csv_file)

Detailed peak data saved to: .\dmsoResults15min_detailed_peak_data.csv
Aggregated peak data saved to: .\dmsoResults15min_aggregated_data.csv
