In [8]:
import os
import glob
import csv
import numpy as np
import pandas as pd

directory_selected_pdes = "/global/cfs/cdirs/dune/users/wermelinger/FSD/PDE_study/pde_files_v7/"
pde_file_pattern = "*.csv"
file_list_pdes = sorted(glob.glob(os.path.join(directory_selected_pdes, pde_file_pattern)))

if len(file_list_pdes) == 0:
    raise RuntimeError(f"No CSV files found in: {directory_selected_pdes}")

# -----------------------------
# 1) Read first file: grab the ID columns (kept as-is / same order)
# -----------------------------
df0 = pd.read_csv(file_list_pdes[0], sep=None, engine="python")

id_cols = ["global_trap_id", "tpc", "adc", "ch_list"]
pe_cols = ["PE_meas", "PE_meas_noLTcr", "PE_exp", "PE_exp_noLTcr"]

for c in id_cols + pe_cols:
    if c not in df0.columns:
        raise KeyError(f"Column '{c}' not found in {file_list_pdes[0]}")

ids_df = df0[id_cols].copy()
ids_df["ch_list"] = ids_df["ch_list"].astype(str).str.strip()

nrows = len(ids_df)

# Running sums (arrays)
sum_PE_meas        = np.zeros(nrows, dtype=np.float64)
sum_PE_meas_noLTcr = np.zeros(nrows, dtype=np.float64)
sum_PE_exp         = np.zeros(nrows, dtype=np.float64)
sum_PE_exp_noLTcr  = np.zeros(nrows, dtype=np.float64)

# Helper: check that ID columns match across files (recommended)
def assert_same_ids(df_ids, ref_ids, filename):
    if len(df_ids) != len(ref_ids):
        raise ValueError(f"Row count mismatch in {filename}: {len(df_ids)} vs {len(ref_ids)}")
    # strict equality check for all ID columns, row-by-row
    if not df_ids.reset_index(drop=True).equals(ref_ids.reset_index(drop=True)):
        # If you want, you can print diffs here; for now raise.
        raise ValueError(f"ID columns/order differ in {filename} (global_trap_id/tpc/adc/ch_list).")

# -----------------------------
# 2) Loop all files: accumulate PE columns
# -----------------------------
for fpath in file_list_pdes:
    df = pd.read_csv(fpath, sep=None, engine="python")

    # validate + align
    df_ids = df[id_cols].copy()
    df_ids["ch_list"] = df_ids["ch_list"].astype(str).str.strip()
    assert_same_ids(df_ids, ids_df, fpath)

    pe = df[pe_cols].copy()
    for c in pe_cols:
        pe[c] = pd.to_numeric(pe[c], errors="coerce").fillna(0.0)

    sum_PE_meas        += pe["PE_meas"].to_numpy(dtype=np.float64)
    sum_PE_meas_noLTcr += pe["PE_meas_noLTcr"].to_numpy(dtype=np.float64)
    sum_PE_exp         += pe["PE_exp"].to_numpy(dtype=np.float64)
    sum_PE_exp_noLTcr  += pe["PE_exp_noLTcr"].to_numpy(dtype=np.float64)

# -----------------------------
# 3) Write output with csv.writer
# -----------------------------
output_file = "pde_sum_all_files.csv"

titles = [
    "global_trap_id",
    "tpc",
    "adc",
    "ch_list",
    "PE_meas",
    "PE_meas_noLTcr",
    "PE_exp",
    "PE_exp_noLTcr"
]

with open(output_file, "w", newline="") as f_csv:
    writer = csv.writer(f_csv)
    writer.writerow(titles)

    for i in range(nrows):
        writer.writerow([
            int(ids_df.loc[i, "global_trap_id"]),
            int(ids_df.loc[i, "tpc"]),
            int(ids_df.loc[i, "adc"]),
            ids_df.loc[i, "ch_list"],
            sum_PE_meas[i],
            sum_PE_meas_noLTcr[i],
            sum_PE_exp[i],
            sum_PE_exp_noLTcr[i],
        ])

print(f"Wrote: {output_file}")
print(f"Summed over {len(file_list_pdes)} files, rows={nrows}")


Wrote: pde_sum_all_files.csv
Summed over 699 files, rows=80
