# Data Import

In [1]:
# Import libraries
import os
import pandas as pd

In [None]:
date = "04-17-2025"
data_dict = {}

In [3]:
graph_str     = "../graphs/" + date + "/"
directory_str = "../data/" + date + "/"

for dir in os.listdir(directory_str):
    data_dict[dir] = []

    tmp_graph_str     = graph_str + dir + "/"
    tmp_directory_str = directory_str + dir + "/"

    if os.path.isdir(tmp_graph_str) == False:
        os.makedirs(tmp_graph_str)

    for file in os.listdir(tmp_directory_str):
        data_dict[dir].append(pd.read_csv(tmp_directory_str + file, delimiter=";"))

# Data Cleaning

In [4]:
X_COL = "Flow EZ #2 (12806) - Setpoint"
Y_COL = "Flow Unit #1 [Flow EZ #1 (11411)]"

filtered_dict = {}

In [5]:
for key in data_dict.keys():
    filtered_dict[key] = []

    for df in data_dict[key]:
        filtered_dict[key].append(df[[X_COL, Y_COL]])

# Functions

In [6]:
import numpy as np

from typing import Tuple
from scipy.stats import linregress
from scipy.optimize import curve_fit

In [7]:
def find_log_fit(df: pd.DataFrame) -> Tuple[str, float]:
    """
    Use scipy to find the logarithmic fit and fitting parameters
    """

    log_x = np.log(df[X_COL])
    y = df[Y_COL]

    a_log, b_log, r_log, _, _ = linregress(log_x, y)

    df['log_fit'] = a_log * log_x + b_log

    y_true     = df[Y_COL]
    y_pred_log = df['log_fit']
    ss_res_log = np.sum((y_true - y_pred_log) ** 2)
    ss_tot_log = np.sum((y_true - np.mean(y_true)) ** 2)
    r2_log = 1 - (ss_res_log / ss_tot_log)

    if not np.isnan(a_log):
        log_eq = (
            f"Log: y = {a_log:.3f} ln(x) + {b_log:.3f}\n"
            f"R² = {r2_log:.3f}"
        )
    else:
        df['log_fit'] = np.nan
        log_eq = "Logarithmic fit failed"

    return log_eq

In [8]:
def find_logistic_fit(df: pd.DataFrame) -> Tuple[str, float]:
    """
    Use scipy to find the logistic fit and fitting parameters
    """

    # Logistic function definition
    def logistic(x, L, k, x0):
        return L / (1 + np.exp(-k * (x - x0)))
    
    # Initial parameter guess: [max y, slope, midpoint]
    initial_guess = [df[Y_COL].max(), 1, df[X_COL].median()]

    # Fit the logistic model
    try:
        params, _ = curve_fit(logistic, df[X_COL], df[Y_COL], p0=initial_guess, maxfev=10000)
        L, k, x0_log = params
        df['logistic_fit'] = logistic(df[X_COL], L, k, x0_log)
        logistic_eq = f"Logistic: y = {L:.2f} / (1 + e^(-{k:.2f}(x - {x0_log:.2f})))"

        # Calculate R²
        y_true = df[Y_COL]
        y_pred = df['logistic_fit']
        ss_res = np.sum((y_true - y_pred) ** 2)
        ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
        r2_logistic = 1 - (ss_res / ss_tot)

        logistic_eq = (
            f"Logistic: y = {L:.2f} / (1 + e^(-{k:.2f}(x - {x0_log:.2f})))\n"
            f"R² = {r2_logistic:.3f}"
        )
    except RuntimeError:
        df['logistic_fit'] = np.nan
        logistic_eq = "Logistic fit failed"

    return logistic_eq

In [9]:
def find_lin_fit(df: pd.DataFrame) -> Tuple[str, float]:
    """
    Use scipy to find the linear fit and fitting parameters
    """

    a_lin, b_lin, r_lin, _, _ = linregress(df[X_COL], df[Y_COL])

    df['lin_fit'] = a_lin * df[X_COL] + b_lin

    y_true     = df[Y_COL]
    y_pred_lin = df['lin_fit']
    ss_res_lin = np.sum((y_true - y_pred_lin) ** 2)
    ss_tot_lin = np.sum((y_true - np.mean(y_true)) ** 2)
    r2_lin = 1 - (ss_res_lin / ss_tot_lin)

    lin_eq = (
        f"Log: y = {a_lin:.3f} ln(x) + {b_lin:.3f}\n"
        f"R² = {r2_lin:.3f}"
    )

    return lin_eq

In [10]:
def melt_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    Melt data for legend support
    """

    df_melt = pd.melt(
        df,
        id_vars=[X_COL, Y_COL],
        value_vars=['log_fit', 'lin_fit', 'logistic_fit'],
        var_name='Fit Type',
        value_name='Fit Value'
    )

    # Rename for legend
    fit_labels = {'log_fit': 'Logarithmic Fit', 'lin_fit': 'Linear Fit', 'logistic_fit': 'Logistic Fit'}
    df_melt['Fit Type'] = df_melt['Fit Type'].map(fit_labels)

    return df_melt

# Data Analysis

In [11]:
from plotnine import (
    ggplot,
    aes,
    geom_point,
    geom_line,
    ggtitle,
    labs,
    annotate,
    xlim,
    ylim,
    scale_color_manual,
)

In [12]:
X_LABEL = "Pressure In (mb)"
Y_LABEL = "Flow Rate Out (µL/min)"

In [13]:
for key in filtered_dict.keys():
    df = pd.DataFrame()
    chip_list = filtered_dict[key]

    for i, df in enumerate(chip_list):
        # Find applicable fits
        log_eq      = find_log_fit(df)
        lin_eq      = find_lin_fit(df)
        logistic_eq = find_logistic_fit(df)

        # Melt the data
        df_melt = melt_data(df)

        # Make the plot
        plot =  (
            ggplot(df_melt, aes(X_COL, Y_COL)) +
            geom_point() +
            geom_line(
                aes(
                    y='Fit Value',
                    color='Fit Type'
                )
            ) +
            scale_color_manual(
                values = {
                    'Logarithmic Fit': 'blue',
                    'Linear Fit': 'red',
                    'Logistic Fit': 'green'
                }
            ) +
            ggtitle(f"{Y_LABEL} vs {X_LABEL}") +
            labs(
                x=X_LABEL,
                y=Y_LABEL,
                color="Model Fit"
            ) +
            xlim(0, None) +
            ylim(0, 400) +
            annotate(
                "text",
                x=0,
                y=400,
                label=logistic_eq,
                ha='left',
                va='top',
                size=8,
                color='green'
            ) +
            annotate(
                "text",
                x=0,
                y=350,
                label=log_eq,
                ha='left',
                va='top',
                size=8,
                color='blue'
            ) +
            annotate(
                "text",
                x=0,
                y=300,
                label=lin_eq,
                ha='left',
                va='top',
                size=8,
                color='red'
            )
        )

        # Save the plot
        plot.save(graph_str + f"{key}/graph_{i}.png", width=10, height=6, dpi=300)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user