In [3]:
from atp_analysis import ATPAnalyzer
import pandas as pd

base = "july25_25/"
meta_path = "/Volumes/Hadland_T7/Sequencing_Data/Fagradalsfjall/final_data/metadata_total.txt"

analyzer = (
    ATPAnalyzer(data_csv=base + "measurements_july25.csv", time_csv=base + "timestamps_all.csv")
      .integrate()
      .fit_standard_curve()                               # 1) fit standards FIRST (raw integrals)
      .fit_tris_drift(separate=True, split_seconds=1000)  # 2) Tris drift model (or separate=False)
      .apply_corrections_and_quantify(
            extract_vol=4.0,
            sample_vol=pd.read_csv(base + "atp_weights.csv"),  # or a scalar like 5.0
            sample_unit="g"  # labels outputs as ng per g
        )
      .compute_blank_threshold(blank_names=['Blank 2','Blank 2.1','Blank 2.2'], k=3.0)  # optional
      .merge_metadata(meta=meta_path, right_key="#SampleID")  # optional
)
analyzer.plot_tris_drift(save_path=base+"outputs/tris_drift.png")
analyzer.plot_standard_curve(save_path=base + "outputs/ATP_Standard_Curve.png")  # optional
analyzer.plot_vs_metadata(x="Age", y="avg_concentration", save_path=base + "outputs/atp_vs_age.png")  # optional
analyzer.save_outputs(prefix=base + "outputs/atp_")  # writes CSVs (integrals / samples_wide / grouped / merged_meta)

<atp_analysis.ATPAnalyzer at 0x177430190>

In [None]:
# Example: End-to-end ATP analysis with Tris-as-intercept workflow
#
# What you need on disk (wide-format):
#   - data_csv:      a CSV with a "Time" column + one column per sample/standard/Tris/blank
#                    (e.g., "Time, 100, 100.1, Tris, Tris.1, SampleA, SampleA.1, Blank, ...")
#   - time_csv:      a 1-row CSV whose headers are the same sample names as columns in data_csv.
#                    Each cell is a timestamp string (e.g., "8/22/25 09:47").
#   - sample_vol CSV (optional): a CSV with columns ['Base_Sample','Sample_Volume'] to normalize
#                    back to the original sample amount (e.g., grams soil, mL water).
#   - metadata file (optional): CSV/TSV with per-sample info; must contain a key
#                    column to join with Base_Sample (e.g., "#SampleID").
#
# Notes on naming conventions:
#   • Standards are detected if the column name starts with a number (e.g., "100", "10", "0.5").
#   • Tris controls are detected if "Tris" appears in the name (case-insensitive).
#   • Replicates are grouped by stripping a trailing ".<n>" (e.g., "SampleA", "SampleA.1", "SampleA.2" → Base_Sample = "SampleA").
#   • Blanks: pass your actual blank names to compute_blank_threshold(blank_names=[...]). Optional
#
# Scientific model used here:
#   • Fit the standard curve FIRST (slope & intercept) on raw integrals from standards.
#   • Model Tris drift over time (one or two segments).
#   • Correct each sample by subtracting the predicted Tris at that time (dynamic intercept).
#   • Convert corrected luminescence to concentration using ONLY the slope:
#         Conc_extract (ng/mL) = Corrected_Luminescence / slope
#   • Optionally back-calc to ng per g (or per mL) of original sample using your Sample_Volume.

from atp_analysis import ATPAnalyzer
import pandas as pd

# -----------------------
# 1) Set up file locations
# -----------------------
# Use a base directory to keep outputs together
base = "example_run/"  # change to your folder; it can be a nested path like "results/2025-08-22/"
data_csv = base + "measurements.csv"       # your wide-format luminescence time series
time_csv = base + "timestamps.csv"         # 1-row timestamps, headers must match columns in data_csv

# Optional inputs (uncomment or replace with your own)
sample_vol_csv = base + "sample_volumes.csv"   # must have columns: Base_Sample, Sample_Volume
meta_path = base + "metadata.tsv"              # CSV/TSV; must have a join key (e.g., "#SampleID")

# -----------------------
# 2) Choose analysis knobs
# -----------------------
# Tris drift model:
use_two_segment_tris = True         # set False to fit a single linear drift for Tris
tris_split_seconds   = 1000         # only used if use_two_segment_tris=True; choose a sensible split for your run

# Extraction / normalization parameters:
extract_vol_mL = 4.0                # mL of Tris (or diluent) used in extraction
# Three options for sample_vol in .apply_corrections_and_quantify():
#   A) None  -> report ng/mL (extract) and Total_ng_in_extract
#   B) float -> one scalar (e.g., grams soil or mL water) for ALL samples
#   C) DataFrame -> per-sample volumes/weights via columns ['Base_Sample','Sample_Volume']
#
# Pick one:
sample_vol_option = pd.read_csv(sample_vol_csv)     # Option C (most common): custom per-sample volumes
# sample_vol_option = 5.0                           # Option B: every sample was 5 g (or 5 mL)
# sample_vol_option = None                           # Option A: stay in extract units only
sample_unit_label = "g"                             # used only when normalizing back to original sample; e.g., "g" or "mL"

# Blank handling (optional):
blank_names = ['Blank', 'Blank.1', 'Blank 2']       # set these to your actual blank column names
k_sd = 3.0                                          # threshold = mean_blank + k * SD_blank

# Metadata merge (optional):
right_key_in_meta = "#SampleID"                     # column in your metadata that matches Base_Sample
# If your metadata uses a different key (e.g., "Sample"), change right_key_in_meta accordingly.

# Plot output paths (folders will be created automatically by the module):
std_curve_png  = base + "outputs/ATP_Standard_Curve.png"
meta_plot_png  = base + "outputs/ATP_vs_Age.png"

# CSV output prefix (folder will be created automatically):
csv_prefix     = base + "outputs/atp_"

# ---------------------------------------------------
# 3) Run the pipeline (toggle options as needed)
# ---------------------------------------------------
analyzer = (
    ATPAnalyzer(data_csv=data_csv, time_csv=time_csv)
      .integrate()
      .fit_standard_curve()                               # 1) Fit standards FIRST (raw integrals)
      .fit_tris_drift(                                    # 2) Tris baseline drift model
          separate=use_two_segment_tris,
          split_seconds=tris_split_seconds if use_two_segment_tris else None
      )
      .apply_corrections_and_quantify(                    # 3) Subtract time-matched Tris; use slope only
          extract_vol=extract_vol_mL,
          sample_vol=sample_vol_option,                   # None | float | DataFrame(['Base_Sample','Sample_Volume'])
          sample_unit=sample_unit_label                   # labels output as e.g. ng per g (only used if sample_vol is float or DataFrame)
      )
      .compute_blank_threshold(                           # 4) Optional: blank thresholding
          blank_names=blank_names,
          k=k_sd
      )
      .merge_metadata(                                    # 5) Optional: join with metadata
          meta=meta_path,
          right_key=right_key_in_meta
      )
)

# ---------------------------------------------------
# 4) Plots (folders auto-created)
# ---------------------------------------------------
# Standard curve figure:
#   through_origin=False -> shows the original linear fit with intercept (for traceability)
#   through_origin=True  -> draws a line through the origin (visual to match Tris-as-intercept math)
analyzer.plot_standard_curve(save_path=std_curve_png, through_origin=True)

# Plot average concentration vs a metadata column (requires merge_metadata earlier):
#   Change x=... to any metadata column you want (e.g., "Age", "Depth_cm", "pH").
analyzer.plot_vs_metadata(x="Age", y="avg_concentration", save_path=meta_plot_png)

# Plot Tris drift and linear fits
analyzer.plot_tris_drift(save_path=base+"outputs/tris_drift.png")

# ---------------------------------------------------
# 5) CSV outputs (folder auto-created)
# ---------------------------------------------------
# Writes any tables that exist:
#   - {prefix}integrals.csv         (integral & timestamp per column)
#   - {prefix}samples_wide.csv      (aliquot-level concentrations and fields)
#   - {prefix}grouped.csv           (mean±sd per Base_Sample)
#   - {prefix}merged_meta.csv       (if metadata was merged)
analyzer.save_outputs(prefix=csv_prefix)

# -----------------------
# Troubleshooting tips
# -----------------------
# • KeyError 'Corrected_Luminescence':
#     Ensure you're running the latest module where apply_corrections_and_quantify()
#     assigns the corrected DataFrame back to self.integrals_df BEFORE splitting.
# • No standards detected:
#     Make sure standard columns start with a number (e.g., "100", "0.5", not "Std_100").
# • Timestamps parsing:
#     time_csv must have a single row; headers match sample names in data_csv.
#     The module understands formats like "8/22/25 9:47", "2025-08-22 09:47:00", "HH:MM:SS".
# • Odd extra columns:
#     The module auto-drops columns named "Unnamed: xx" and columns that are all-NaN (except "Time").
# • Slope stability (optional diagnostic):
#     If you suspect sensitivity drift (not just baseline), split standards early/late and compare
#     through-origin slopes on corrected signals. If different, consider a piecewise slope.