In [83]:
import pandas as pd
from scipy.signal import savgol_filter
import numpy as np
import os 
import re
import math
import matplotlib.pyplot as plt
from pathlib import Path
from dotenv import load_dotenv

In [84]:
def add_exp_labels(df, exp):
    df["exp"] = [exp] * len(df)

    exp_labels = {
        "M4576_s2": "10mM Pulses #3",
        "M4581_s1": "10mM Pulses #1",
        "M4584_s1": "10mM Pulses #2",
        "M6881_s2": "1mM Pulses #1",
        "M6881_s5": "0.1mM Pulses #1",
        "M6881_s6": "0.01mM Pulses #1",
        # autotht
        "M6813_s1": "0.01mM Cont. #1",
        "M6813_s4": "0.01mM Cont. #2",
        "M6605_s3": "10mM Pulses #1",
        "M6605_s4": "10mM Pulses #2",
        "M6605_s8": "10mM Pulses #3"
    }

    df["Exp_Label"] = df["exp"].map(exp_labels)
    return df

In [85]:
def add_delta_intensity(df):
  df = df.copy()
  df["Delta_ThT"] = df["Intensity_ThT"] - df["Initial_Intensity_ThT"]
  return df

In [86]:
def plot_styling(df, column, cmap_name="tab20", num_samples=20):
    unique_vals = df[column].unique()
    unique_vals = list(unique_vals)[:num_samples]
    cmap = plt.get_cmap(cmap_name, len(unique_vals))
    color_dict = {val: cmap(i) for i, val in enumerate(unique_vals)}

    return color_dict


In [87]:
def extract_features(df):
    # This pattern now correctly handles column names like 'Mean1.0' and 'Perim.1.0'
    # It looks for (text_with_dots)(numbers)(.some_other_numbers_at_the_end)
    pattern = re.compile(r"([A-Za-z_.]+)(\d+)\.\d+$")

    feature_map = {}
    for col in df.columns:
        match = pattern.match(col)
        if match:
            prefix, idx = match.groups()
            # Clean up the prefix by removing any trailing dot (like from 'Perim.')
            prefix = prefix.strip('.')
            if idx not in feature_map:
                feature_map[idx] = {}
            feature_map[idx][prefix] = col

    dfs = []

    if not feature_map:
        print("WARNING: extract_features did not find any matching columns.")
        print("Please check the regex pattern against your CSV's column headers.")
        return pd.DataFrame() # Return an empty dataframe to avoid crashing

    for idx, feature_cols in feature_map.items():
        sub_cols = list(feature_cols.values())
        sub_df = df[sub_cols].copy()

        rename_dict = {v: k for k, v in feature_cols.items()}
        sub_df.rename(columns=rename_dict, inplace=True)

        for col in ["Mean", "X", "Y"]:
            if col not in sub_df.columns:
                sub_df[col] = None

        sub_df["Track"] = idx
        sub_df["Frame"] = range(len(df)) 

        dfs.append(sub_df)

    df_long = pd.concat(dfs, ignore_index=True)
    df_long.rename(columns={"Mean": "Intensity"}, inplace=True)

    return df_long

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

def add_germination_time(df, feature = "DerivSavgol_Intensity", threshold=-8):
    spore_data = df.groupby("Track_ID")
    phase_df = []
    skip_ids = []
    for spore_id, spore in spore_data:
        spore = spore.sort_values("Frame").copy()

        drop_frames = spore[spore[feature] < threshold]

        germ_frame = drop_frames["Frame"].min() if not drop_frames.empty else None
        spore["Germination_Index"] = [germ_frame] * len(spore)
        # assign germination status
        if germ_frame is not None:
            spore["Status"] = (spore["Frame"] >= germ_frame).astype(int)
        else:
            skip_ids.append(spore_id)
            spore["Status"] = 0

        phase_df.append(spore)
    df = pd.concat(phase_df, ignore_index=True)
    return df[~df["Track_ID"].isin(skip_ids)]


In [89]:
def add_derivative_savgol(df, feature, track_column, window_length=7, poly_order=3):
    df = df.copy()
    deriv_col = f"DerivSavgol_Intensity"
    results = []

    spore_data = df.groupby([track_column])
    for spore_id, spore in spore_data:
        spore = spore.sort_values("Frame").copy()
        x = spore["Frame"].values
        y = spore[feature].values.astype(float)


        dt = np.median(np.diff(x))
        dy_dx = savgol_filter(y, window_length, poly_order, deriv=1, delta=dt)

        spore[deriv_col] = dy_dx
        results.append(spore)

    return pd.concat(results, ignore_index=True)

In [90]:
def extract_xy(df):
  pos_dict = {}
  spore_data = df.groupby(["Track_ID"])
  for track, spore in spore_data:
    avg_x = spore["X"].mean()
    avg_y = spore["Y"].mean()
    track_id = spore["Track_ID"].values[0]
    pos_dict[track_id] = (avg_x, avg_y)
  return pos_dict

In [91]:
def match_ids(dict1, dict2, max_distance=5):
    matches = []
    used_tht_ids = set()  

    for id1, (x1, y1) in dict1.items():
        closest_id = None
        min_dist = float('inf')

        for id2, (x2, y2) in dict2.items():
            if id2 in used_tht_ids:
                continue  # skip if already matched

            dist = math.sqrt((x1 - x2)**2 + (y1 - y2)**2)
            if dist < min_dist and dist <= max_distance:
                min_dist = dist
                closest_id = id2

        if closest_id is not None:
            matches.append((id1, closest_id))
            used_tht_ids.add(closest_id)

    print(f"matched {len(matches)} tracks...")
    return matches


def merge_matched_tracks(phc_df, tht_df, matched_pairs, phc_suffix="PhC", tht_suffix="ThT"):
    merged_list = []
    for phc_id, tht_id in matched_pairs:
        phc_track = phc_df[phc_df["Track"] == phc_id].copy()
        tht_track = tht_df[tht_df["Track"] == tht_id].copy()

        phc_track = phc_track.add_suffix(f"_{phc_suffix}")
        tht_track = tht_track.add_suffix(f"_{tht_suffix}")

        phc_track = phc_track.rename(columns={f"Frame_{phc_suffix}": "Frame"})
        tht_track = tht_track.rename(columns={f"Frame_{tht_suffix}": "Frame"})

        merged = pd.merge(phc_track, tht_track, on="Frame", how="right")
        #merged["MATCHED_PAIR"] = f"{phc_id}_{tht_id}"
        merged_list.append(merged)

    return pd.concat(merged_list, ignore_index=True) if merged_list else pd.DataFrame()


In [92]:
def calculate_interval_germ_exposures(initial_min, between_min, minutes_between_frames, exp_length):
   i = initial_min/minutes_between_frames - 1
   exposures = [i]
   while i < exp_length:
      i += between_min/minutes_between_frames 
      exposures.append(i)
   if exposures[-1] < exp_length:
      exposures = exposures[:-1]
   return exposures
def add_germ_exposures_todf(df, germ_exposures_list, concentration):
    df["Germinant"] = df["Frame"].apply(lambda f: 1 if f in germ_exposures_list else 0)
    df["Germinant"] = df["Germinant"].apply(lambda f: f * concentration)
    return df

In [93]:
def plot_intensity(df, color_dict, feature = "Intensity_ThT", alpha = 1, linestyle = "-", num_samples = None):
  germ_col = "Status_PhC"
  spore_data = df.groupby("Track_PhC")
  if num_samples != None:
      samples = 0
  for track_id, data in spore_data:
      exp = data["exp"].iloc[0]  # Add this
      color = color_dict.get(track_id, "gray")

      dormant_data = data[data[germ_col] == 0]
      germinated_data = data[data[germ_col] == 1]

      sns.lineplot(x="Frame", y=feature, data=dormant_data,
                     linewidth=2, color = color, alpha = alpha, label = feature.replace("_", " "), linestyle = linestyle)
        #sns.lineplot(x = "FRAME", y = feature, data = data, alpha = alpha, color = color)
      if not germinated_data.empty:
            sns.lineplot(x="Frame", y=feature, data=germinated_data,
                         linewidth=6, color = color, alpha = alpha, linestyle = linestyle)
      if samples > num_samples - 1: 
          return 
      samples += 1


In [94]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def manually_filter_spores(base, df, feature, display_time=1, filter = 1):

    # Grab experiment name safely
    exp = df["exp"].iloc[0] if "exp" in df.columns else "unknown"
    
    # Set up ignore file path
    ignore_file = os.path.join(base, f"{exp}_FilteredPhCTracks.csv")

    # Load ignored spores
    if os.path.exists(ignore_file):
        ignored_spores = set(map(str, pd.read_csv(ignore_file)["Track_PhC"]))
    else:
        ignored_spores = set()

    spores_to_keep = []
    spores_to_ignore = []
    if filter == 0: 
        print("skipping filtering...")
        print(f'keeping {df["Track_PhC"][~df["Track_PhC"].isin(ignored_spores)].nunique()} spores...')
        return df[~df["Track_PhC"].isin(ignored_spores)]
    spore_data = df.groupby("Track_PhC")
    for spore_id, data in spore_data:

        if spore_id in ignored_spores:
            print(f"found")
            continue

        # Plot
        fig, ax = plt.subplots(figsize=(3, 1))
        sns.lineplot(data=data, x="Frame", y=feature, ax=ax)
        ax.axvline(data["Germination_Index_PhC"].values[0], color='red', linestyle='--')
        plt.show(block=False)
        plt.pause(display_time)
        plt.close(fig)

        keep = input(f"Keep spore {spore_id}? (y/n/a): ").strip().lower()
        if keep not in ["a", "y", "n"]:
            print("Invalid input... showing again...")
            keep = "a"

        if keep == "a":
            fig, ax = plt.subplots(figsize=(3, 1))
            sns.lineplot(data=data, x="Frame", y=feature, ax=ax)
            ax.axvline(data["Germination_Index_PhC"].values[0], color='red', linestyle='--')
            plt.show(block=False)
            plt.pause(display_time)
            plt.close(fig)
            keep = input(f"Keep spore {spore_id}? (y/n): ").strip().lower()

        if keep == "y":
            spores_to_keep.append(spore_id)
        elif keep == "n":
            spores_to_ignore.append(spore_id)

    # Save ignored spores
    if spores_to_ignore:
        new_ignored = pd.DataFrame(spores_to_ignore, columns=["Track_PhC"])
        if os.path.exists(ignore_file):
            existing = pd.read_csv(ignore_file)
            updated = pd.concat([existing, new_ignored], ignore_index=True).drop_duplicates()
        else:
            updated = new_ignored
        updated.to_csv(ignore_file, index=False)

    print(f"Keeping {len(spores_to_keep)} spores...")

    # Return filtered DataFrame
    return df[df["Track_PhC"].isin(spores_to_keep)]


In [95]:
def add_rolling_derivative(df, feature, track_column, window=5):
    df = df.copy()
    deriv_col = f"Derivative_{feature}"
    results = []

    spore_data = df.groupby(["Exp_Label", track_column])
    for (exp_label, spore_id), spore in spore_data:
        spore = spore.sort_values("Frame").copy()
        rolling_diff = spore[feature].diff().rolling(window=window, min_periods=1).mean()
        spore[deriv_col] = rolling_diff.fillna(0)
        results.append(spore)

    return pd.concat(results, ignore_index=True)
def add_rolling_mean(df, feature, track_column, window=5):
    df = df.copy()
    smoothed_col = f"Rolling_{feature}"
    result = []

    spore_data = df.groupby(["Exp_Label", track_column])
    for spore_id, spore in spore_data:
        spore = spore.sort_values("Frame").copy()
        spore[smoothed_col] = spore[feature].rolling(window=window, min_periods=1).mean()
        result.append(spore)

    return pd.concat(result, ignore_index=True)

In [96]:
def add_initial_feature(df, feature, track_column, num_init = 11):
  df = df.copy()
  df = df.sort_values("Frame")

  init_col_name = f"Initial_{feature}"
  column_values = []

  spore_data = df.groupby(["Exp_Label", track_column])
  for spore_id, spore in spore_data:
        feature_vals = spore[feature].values.astype(float)
        initial_feature = np.mean(feature_vals[:num_init])

        spore[init_col_name] = [initial_feature] * len(spore)
        column_values.append(spore)

  df_return = pd.concat(column_values, ignore_index = True)
  return df_return 

def add_cumsum_feature(df, feature, track_column = "Track_PhC"):#df, feature, track_column="TRACK_ID"):

    df = df.sort_values([track_column, "Frame"]).copy()
    cumsum_col = f"CumuSum_{feature}"
    df[cumsum_col] = df.groupby(track_column)[feature].cumsum()
    return df

In [97]:
def plot_phase(df, feature = "DerivSavgol_Intensity_PhC", frame_col = "Frame", intensity_col = "Intensity_PhC"):
  for spore_id, spore_data in df.groupby("Track_PhC"):
    #plt.axhline(-8, color = "lightgrey", label = "Threshold")
    germinated_df = spore_data[spore_data["Status_PhC"] == 1]
    dormant_df = spore_data[spore_data["Status_PhC"] == 0]
    sns.lineplot(x = frame_col, y = intensity_col, data = spore_data, color = "tab:blue", linewidth = 3, label = "Phase Intensity", alpha = 1)

    sns.lineplot(x = frame_col, y = feature, data = spore_data, color = "tab:orange", linewidth = 3, label = f"Savitzky-Golay Derivative")
    #sns.lineplot(x = frame_col, y = feature, data = spore_data, color = "tab:orange", linewidth = 1, alpha = 0.5)
    #sns.lineplot(x = frame_col, y = feature, data = germinated_df, color ="orange", linewidth = 5)
    plt.axvline(spore_data["Germination_Index_PhC"].values[0], label = "Threshold Met", linewidth = 3, color = "darkgrey", alpha = 0.7)

    #sns.lineplot(x = frame_col, y = intensity_col, data = germinated_df, color = "tab:blue", linewidth = 5, label = "PhC: phase-dark")
    plt.xlabel("Frame", fontsize = 14)
    plt.ylabel("")
    plt.xticks(fontsize = 10)
    plt.yticks(fontsize = 10)
    plt.legend(fontsize = 12)
    plt.show()


In [98]:
# --- Configuration ---
load_dotenv()
# Load the main experiment config from the environment
DATA_ROOT = os.getenv("DATA_ROOT")
EXPERIMENT = os.getenv("EXPERIMENT_NAME")

if not all([DATA_ROOT, EXPERIMENT]):
    raise ValueError("DATA_ROOT or EXPERIMENT_NAME not set in your .env file.")

# This variable is for labeling the output file, you can change it as needed
EXP_LABEL = "Pulses_ThT_Analysis" 

# --- Static variables for this script's specific purpose ---
# This script's job is to merge PhC and a fluorescent channel.
MICR_PHC = "PH"
MICR_FLUOR = "ThT"

BASE = Path(DATA_ROOT)

# --- Path Setup ---
# CORRECTED: Added "data" and "processed" to the path to match your structure
fiji_base = BASE / "data" / "processed" / f"{EXPERIMENT}_Fiji"
data_folder = fiji_base / "Processed_Data"

# Create the output directory if it doesn't exist to prevent errors on save.
data_folder.mkdir(parents=True, exist_ok=True)

# --- Data Loading and Initial Processing ---
# Initialize dataframes as None. We will check later if they were successfully loaded.
phc_df = None
tht_df = None

print(f"Searching for results files in: {fiji_base}")
try:
    for csv_filename in os.listdir(fiji_base):
        if "_Results.csv" not in csv_filename:
            print(f"Skipping non-results file: {csv_filename}")
            continue  # Skip any file that isn't a results file

        # Full path to the current CSV file
        input_csv_path = os.path.join(fiji_base, csv_filename)
        
        if f"_{MICR_PHC}_" in csv_filename:
            print(f"-> Found 'PHC' data, loading...")
            phc_df = pd.read_csv(input_csv_path)
        elif f"_{MICR_FLUOR}_" in csv_filename:
            print(f"-> Found 'ThT' data, loading...")
            tht_df = pd.read_csv(input_csv_path)

except FileNotFoundError:
    print(f"---")
    print(f"ERROR: The directory was not found.")
    print(f"Please verify this path exists: {fiji_base}")
    phc_df, tht_df = None, None # Ensure dataframes are None so merge step is skipped


# --- Conditional Merging and Analysis ---
# This block only runs if BOTH phc_df and tht_df were successfully loaded in the step above.
if phc_df is not None and tht_df is not None:
    print("\nSUCCESS: Both PhC and ThT data found. Merging and analyzing...")

    # Match spores between the two channels based on XY coordinates
    phc_xy = extract_xy(phc_df)
    tht_xy = extract_xy(tht_df)
    matches = match_ids(phc_xy, tht_xy)
    
    # Merge the dataframes into a single master dataframe 'df'
    df = merge_matched_tracks(phc_df, tht_df, matches, phc_suffix="PhC", tht_suffix="ThT")
    df = add_exp_labels(df, EXPERIMENT)

    # --- Feature Engineering and Analysis on Merged Data ---
    # Process PhC data
    df = add_derivative_savgol(df, "Intensity_PhC", "Track_PhC", window_length=7, poly_order=3)
    df = add_germination_time(df, feature="DerivSavgol_Intensity_PhC")
    df = add_initial_feature(df, "Intensity_PhC", "Track_PhC")
    
    # Process ThT data
    feature_tht = f"Intensity_{MICR_FLUOR}"
    df = add_rolling_mean(df, feature_tht, "Track_PhC")
    df = add_rolling_derivative(df, feature_tht, "Track_PhC")
    df = add_initial_feature(df, feature_tht, "Track_PhC")
    df = add_cumsum_feature(df, feature_tht, "Track_PhC")
    df = add_delta_intensity(df)

    # Plot the results
    plot_phase(df)

    # Save the final, merged, and processed data
    final_output_path = os.path.join(data_folder, f"{EXP}_Matched_And_Processed_Data.csv")
    df.to_csv(final_output_path, index=False)
    print(f"\nANALYSIS COMPLETE. Final merged data saved to:\n{final_output_path}")

else:
    print("---")
    print(f"SKIPPING MERGE STEP: Could not load both PhC and {MICR_FLUOR} data.")
    print("Please ensure both '_PhC_Results.csv' and '_ThT_Results.csv' files are present.")
    
    if phc_df is not None:
        print("However, PhC data was processed successfully.")
        
        # You could add PhC-only analysis or plotting here if you wish.
        
        # Define an output path and save the processed PhC dataframe
        phc_only_output_path = os.path.join(data_folder, f"{EXP}_PhC_Processed_Data.csv")
        phc_df.to_csv(phc_only_output_path, index=False)
        
        print(f"-> Saved processed PhC-only data to:\n{phc_only_output_path}")
    else: 
        print("No PhC data found. Please check your input files.")

Searching for results files in: /Users/mratcliff/Documents/GitHub/AroProject/data/processed/Leticia_M4576_s2_Fiji
Skipping non-results file: .DS_Store
-> Found 'ThT' data, loading...
Skipping non-results file: Processed_Data
-> Found 'PHC' data, loading...

SUCCESS: Both PhC and ThT data found. Merging and analyzing...
matched 1131 tracks...


KeyError: 'Track'