# Flow & Pressure Time-Series Analysis
This notebook analyzes CSV data files, producing Flow vs Time and Pressure vs Time plots with sinusoidal fits.

In [None]:
# ── imports ────────────────────────────────────────────────────────────────────
import os, math, glob
import numpy as np
import pandas as pd
from typing import Tuple

import scipy.stats as stats
from scipy.optimize import curve_fit
from plotnine import (
    ggplot, aes, geom_point, geom_line, labs, ggtitle, theme_bw, scale_y_continuous,
    annotate, theme, element_rect, scale_color_manual, guides, guide_legend, theme_classic,
    element_text
)

In [None]:
# ── user‑tunable columns ───────────────────────────────────────────────────────
COL_NAMES = {
    #  Flow‑rate trace (y‑axis 1            Pressure trace (y‑axis 2)
    "TIME"   : "Time",                      # common x‑axis (s)
    "FLOW"   : "MFCS-EZ (884) #2",          # flow rate in µL/min
    "PRESS1" : "MFCS-EZ (884) #2",          # pressure in mbar
    "PRESS2" : "MFCS-EZ (884) #3",          # pressure in mbar
}

# ── file system setup ──────────────────────────────────────────────────────────
DATE          = "10-24-2025"
DATA_DIR      = f"../data/{DATE}"
GRAPH_DIR     = f"../graphs/{DATE}"
ANALYSIS_DIR  = f"../analysis/{DATE}"

os.makedirs(GRAPH_DIR,   exist_ok=True)
os.makedirs(ANALYSIS_DIR, exist_ok=True)

In [None]:
# ── helpers ────────────────────────────────────────────────────────────────────
def _sinusoid(t, A, f, phi, C):
    """
    A simple sinusoidal model: A·sin(2π·f·t + φ) + C
    """
    return A * np.sin(2 * np.pi * f * t + phi) + C

def fit_sine(t, y):
    """
    Fit a sinusoidal curve to time-series data.

    Parameters:
        t (array-like): Time values.
        y (array-like): Data values.

    Returns:
        fit_line (np.ndarray): Fitted sine curve.
        eqn (str): Equation of the fitted sine wave.
    """
    # Drop NaNs and align arrays
    mask = ~np.isnan(t) & ~np.isnan(y)
    t = np.asarray(t[mask], dtype=float)
    y = np.asarray(y[mask], dtype=float)

    if len(y) == 0:
        return np.zeros_like(t), "No valid data"

    # Initial guesses
    guess_A = (y.max() - y.min()) / 2
    guess_C = y.mean()

    # Estimate frequency using FFT for a better initial guess
    dt = np.mean(np.diff(t))
    freqs = np.fft.fftfreq(len(t), d=dt)
    fft_magnitude = np.abs(np.fft.fft(y - guess_C))

    # Ignore the zero-frequency component (DC)
    guess_f = abs(freqs[np.argmax(fft_magnitude[1:]) + 1])
    if guess_f == 0 or np.isnan(guess_f):
        guess_f = 1 / (t.max() - t.min())  # fallback: 1 period over time range

    guess_phi = 0
    p0 = [guess_A, guess_f, guess_phi, guess_C]

    try:
        popt, _ = curve_fit(_sinusoid, t, y, p0=p0, maxfev=10000)
    except RuntimeError:
        # If fitting fails, return the initial guess curve
        fit_line = _sinusoid(t, *p0)
        eqn = f"Fit failed — using guess: {guess_A:.2f}·sin(2π·{guess_f:.3f}·t+{guess_phi:.2f})+{guess_C:.2f}"
        return fit_line, eqn

    A, f, phi, C = popt
    fit_line = _sinusoid(t, *popt)
    eqn = f"{A:.2f}·sin(2π·{f:.3f}·t+{phi:.2f})+{C:.2f}"

    return fit_line, eqn


def build_dual_axis_plot(df: pd.DataFrame, fname: str) -> None:
    """
    Make a Flow-vs-Time & Pressure-vs-Time overlay with twin y-scales.
    """
    t     = df[COL_NAMES["TIME"]]
    flow  = df[COL_NAMES["FLOW"]]
    press = df[COL_NAMES["PRESS"]]

    # Sinusoidal fits
    flow_fit,  flow_eq  = fit_sine(t, flow)
    press_fit, press_eq = fit_sine(t, press)

    # Scale factor to align visually
    scale = flow.max() / press.max() if press.max() else 1

    dplot = pd.DataFrame({
        "t"           : t,
        "Flow"        : flow,
        "Flow-fit"    : flow_fit,
        "Pressure"    : press, # * scale,
        "Pressure-fit": press_fit, # * scale
    })

    p = (
        ggplot(dplot, aes("t"))
        + geom_point(aes(y="Flow", color="'Flow Data'"), size=0.8)
        + geom_line (aes(y="Flow-fit", color="'Flow Fit'"), linetype="dashed")
        + geom_point(aes(y="Pressure", color="'Pressure Data'"), size=0.8)
        + geom_line (aes(y="Pressure-fit", color="'Pressure Fit'"), linetype="dashed")
        + scale_color_manual(
            values={
                "Flow Data": "#1fa3b4",
                "Flow Fit": "#08519c",
                "Pressure Data": "#d62728",
                "Pressure Fit": "#7f0000"
            }
        )
        + labs(
            x="Time (s)", 
            y="Flow rate (µL/min) / Pressure (scaled mbar)",
            title="Flow & Pressure vs Time",
            color="Data Series",
            caption=f"Flow fit: {flow_eq}    Pressure fit: {press_eq}"
        )
        + guides(
            color=guide_legend(
                title="Legend"
            )
        )
        + theme_bw()
        + theme(
            plot_caption=element_text(size=8, color="gray", ha="center")
        )
    )

    p.save(f"{GRAPH_DIR}/{fname}.png", width=12, height=6, dpi=400)

In [None]:
# ── main loop ──────────────────────────────────────────────────────────────────
import os

all_groups = []  # for optional ANOVA reuse

for chip_folder in os.listdir(DATA_DIR):
    csvs = glob.glob(f"{DATA_DIR}/{chip_folder}/*.csv")
    csvs.sort()
    chip_out_dir = f"{GRAPH_DIR}/{chip_folder}"
    os.makedirs(chip_out_dir, exist_ok=True)

    for i, csv in enumerate(csvs, 1):
        # Load the CSV data
        csv = "../data/10-24-2025/chip_1/PC_VIPS_SingleDiodeFwd-Rev.wave_FRONT.FLUIGENT_20251024_182909_0001.csv"
        df = pd.read_csv(csv, delimiter=";")

        # Parse time column as datetime
        df[COL_NAMES["TIME"]] = pd.to_datetime(df[COL_NAMES["TIME"]], errors="coerce")

        # Convert to elapsed seconds (relative to first timestamp)
        df[COL_NAMES["TIME"]] = (df[COL_NAMES["TIME"]] - df[COL_NAMES["TIME"]].iloc[0]).dt.total_seconds()

        # Combine the two pressure columns into a single bipolar signal
        df["CombinedPressure"] = df[COL_NAMES["PRESS1"]] - df[COL_NAMES["PRESS2"]]
        COL_NAMES["PRESS"] = "CombinedPressure"

        # Convert flow + pressure to numeric
        for col in [COL_NAMES["FLOW"], COL_NAMES["PRESS"]]:
            df[col] = pd.to_numeric(df[col], errors="coerce")

        # Drop any rows that are missing after conversion
        df = df[[COL_NAMES["TIME"], COL_NAMES["FLOW"], COL_NAMES["PRESS"]]].dropna()

        print(f"File: {csv}, Shape: {df.shape}")
        print(df.head())

        # build & save plot
        build_dual_axis_plot(df, f"{chip_folder}/graph_{i}")

        all_groups.append(df[COL_NAMES["FLOW"]].values)

print("Analysis complete. Plots saved to:", GRAPH_DIR)