From b446b7a326db55821e982c45c12f4b5c44dc8164 Mon Sep 17 00:00:00 2001
From: Iuri Oksuzian <oksuzian@mu2egpvm05.fnal.gov>
Date: Thu, 18 Sep 2025 14:42:31 -0500
Subject: [PATCH 1/6] Add back epilogs to JobConfig/cosmic/NoField.fcl

---
 JobConfig/cosmic/NoField.fcl | 7 +++++++
 1 file changed, 7 insertions(+)
diff --git a/JobConfig/cosmic/NoField.fcl b/JobConfig/cosmic/NoField.fcl
index 77597ba8..006f3f7e 100644
--- a/JobConfig/cosmic/NoField.fcl
+++ b/JobConfig/cosmic/NoField.fcl
@@ -44,6 +44,13 @@ physics.producers.g4run.Mu2eG4CommonCut: @local::Cosmic.Mu2eG4CommonCutCosmicNoF
 physics.producers.FindMCPrimary.PrimaryGenIds: ["CosmicCRY", "CosmicCORSIKA"]
 # since these data aren't resampled we have to apply the cosmic time offset here
 physics.producers.cosmicTimeOffset.cosmicModuleLabel : "generate"
+
+#
+# final configuration
+#
+#include "Production/JobConfig/common/epilog.fcl"
+#include "Production/JobConfig/primary/epilog.fcl"
+#
 # need to offset the GenParticles
 
 physics.end_paths : [ EndPath ]

From d1c247705ade4bff4b226024bb80a2fc3fa60364 Mon Sep 17 00:00:00 2001
From: Iuri Oksuzian <oksuzian@gmail.com>
Date: Fri, 19 Sep 2025 09:15:28 -0500
Subject: [PATCH 2/6] Add extrapolition to samDatasetsSummary.sh for speed up

---
 Scripts/POMS/samDatasetsSummary.sh | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/Scripts/POMS/samDatasetsSummary.sh b/Scripts/POMS/samDatasetsSummary.sh
index 715c502f..7da04b2b 100755
--- a/Scripts/POMS/samDatasetsSummary.sh
+++ b/Scripts/POMS/samDatasetsSummary.sh
@@ -1,12 +1,19 @@
 #!/bin/bash
 # usage
 if [[ -z "$1" ]]; then
-  echo "Usage: $0 <dataset>" >&2
+  echo "Usage: $0 <dataset> [--sample-files N]" >&2
+  echo "  --sample-files N: Number of files to sample for generated events calculation (default: 10)" >&2
   exit 1
 fi
 
 # Get the dataset name
 dataset="$1"
+
+# Parse command line arguments
+sample_files=10
+if [[ "$2" == "--sample-files" && -n "$3" ]]; then
+  sample_files="$3"
+fi
 # Obtain summary once
 summary_txt=$(samweb list-definition-files --summary "$dataset" 2>/dev/null)
 nfiles=$(echo "$summary_txt" | awk '/File count:/ {print $3}')
@@ -24,9 +31,21 @@ if (( nfiles == 0 )); then
     exit 1
 fi
 
-# Calculate total generated events from all files
-generated=$(samweb list-definition-files "$dataset" 2>/dev/null | \
-  xargs -n1 samweb get-metadata | awk '/dh.gencount/ { sum += $2 } END { print sum+0 }')
+# Calculate total generated events by extrapolating from sampled files
+# OPTIMIZATION: Use SAM's metadata query to get dh.gencount directly
+sample_sum=$(samweb list-definition-files "$dataset" 2>/dev/null | \
+  head -"$sample_files" | while read file; do
+    samweb get-metadata "$file" 2>/dev/null | awk '/dh.gencount/ {print $2}'
+  done | awk '{sum += $1} END {print sum+0}')
+
+# Extrapolate to total number of files
+if (( sample_sum > 0 && sample_files > 0 )); then
+  avg_per_file=$((sample_sum / sample_files))
+  generated=$((avg_per_file * nfiles))
+  echo "Debug: Sampled $sample_files files for dh.gencount, sum=$sample_sum, avg=$avg_per_file, extrapolated=$generated" >&2
+else
+  generated=0
+fi
 
 printf "Triggered: %s\nGenerated: %s\nFiles: %s\nSize: %s\n" "$triggered" "$generated" "$nfiles" "$size"
 

From cfa41513b70728ad042d54b902c140a182709050 Mon Sep 17 00:00:00 2001
From: sophieMu2e <sophie@fnal.gov>
Date: Tue, 23 Sep 2025 12:22:57 -0500
Subject: [PATCH 3/6] new scripts work

---
 JobConfig/ensemble/MakeSignalMDS_tutorial.md  |  52 ++++
 JobConfig/ensemble/fcl/split.fcl              |  30 +++
 JobConfig/ensemble/python/calculateEvents.py  |   2 +-
 JobConfig/ensemble/python/normalizations.py   |   8 +-
 .../ensemble/scripts/Stage3_addsignal.sh      |   0
 .../ensemble/scripts/Stage3_addsignal_easy.sh | 233 ++++++++++++++++++
 JobConfig/ensemble/scripts/combine_ntuples.sh |  79 ++++++
 Scripts/POMS/samDatasetsSummary.sh            |  27 +-
 Scripts/gen_Mix.sh                            |  20 +-
 9 files changed, 430 insertions(+), 21 deletions(-)
 create mode 100644 JobConfig/ensemble/MakeSignalMDS_tutorial.md
 create mode 100644 JobConfig/ensemble/fcl/split.fcl
 mode change 100755 => 100644 JobConfig/ensemble/scripts/Stage3_addsignal.sh
 create mode 100755 JobConfig/ensemble/scripts/Stage3_addsignal_easy.sh
 create mode 100755 JobConfig/ensemble/scripts/combine_ntuples.sh

diff --git a/JobConfig/ensemble/MakeSignalMDS_tutorial.md b/JobConfig/ensemble/MakeSignalMDS_tutorial.md
new file mode 100644
index 00000000..1f13f175
--- /dev/null
+++ b/JobConfig/ensemble/MakeSignalMDS_tutorial.md
@@ -0,0 +1,52 @@
+# Introduction
+
+# Tutorial 
+
+1. Make a new directory in your working directory. You should ensure you have access to Production (either via a musing or a clone). Call this something like "ensemble_MDS2c_CeMLL_1e-14_2weeks" altering the fields as is applicable to what you want to make.
+
+2. Enter the new directory. Run the following command:
+
+```
+Stage3_addsignal_easy.sh --known MDS2c --signal CeMLeadingLog --rate 1e-14 --nexp 10 --chooselivetime 1209600
+```
+
+here the parameters are:
+
+* ``known``: the tag of the mixed background sample you wish to sample from.
+* ``signal``: must be the primary name of the signal type you want to sample (e.g. CeMLeadingLog)
+* ``rate``: chosen signal rate (e.g. 1e-13)
+* ``nexp``: how many pseudo experiments (i.e. random samplings) you want to make
+* ``chosenlivetime``: in seconds (check the config to ensure you don't try to make more than is available)
+
+The output of this command will include
+
+* a set of nexp mcs files, these contains random sets of expected signal events (sampled from a much larger set) and include Poisson statistical variations.
+* a set of nexp nts files that are EventNtuples of the analogous mcs files (names are the same)
+* nexp lists ``filename_ChosenMixed_i`` contain random sets of MDS known ntuples and the new signal ntuples
+* a new directory called ``fcl`` contains the splitting and ntupling fcls for reference
+
+3. Now you have random sets of ntuples you want to combine these into a merged dataset (for some blinding effect). To do that run a second script:
+
+```
+combine_ntuples.sh 1 MDS2c
+```
+
+where the first argumenet is the iteration of the list (1 to nexp) and the second arguement is the MDS tag version name (should be the same as before). You will need to run this for every nexp.
+
+In the current directory you will see that for every nexp (i) you will now see a directory: ``merged_files_i``.
+
+In this directory there will be a set of files (merge factor can be altered within the combine_ntuple.sh script). The filenames will be for example:
+
+```
+nts.mu2e.ensembleMDS2cMix1BB_CeMLeadingLog_1e-14_1223190.2_16.root
+```
+
+where most of this is obvious, the final number (1223190) is the livetime of the sample in seconds.
+
+4. You should be able to analyze this file list as you would any other set of files. If you prefer remote/xrootd etc. file access you will need to upload them to SAM, but I recommend keeping them in you personal directories.
+
+
+
+
+
+
diff --git a/JobConfig/ensemble/fcl/split.fcl b/JobConfig/ensemble/fcl/split.fcl
new file mode 100644
index 00000000..a0e65290
--- /dev/null
+++ b/JobConfig/ensemble/fcl/split.fcl
@@ -0,0 +1,30 @@
+#include "Offline/fcl/minimalMessageService.fcl"
+
+process_name: artsplit
+
+# read only the first 100 events
+source: {
+  module_type: RootInput
+  fileNames: @nil
+  //[ "/pnfs/mu2e/tape/phy-sim/mcs/mu2e/CeMLeadingLogOnSpillTriggered/MDC2020au_perfect_v1_3/art/71/3e/mcs.mu2e.CeMLeadingLogOnSpillTriggered.MDC2020au_perfect_v1_3.001202_00001366.art" ]
+  maxEvents: 0
+  //1150
+}
+
+services: { 
+  message: @local::default_message 
+}
+
+physics: { 
+  e1: [out]  
+  end_paths: [e1] 
+  trigger_paths: []
+}
+
+# write everything you read to a new file
+outputs: {
+  out: {
+    module_type: RootOutput
+    fileName: "first2300_2.art"
+  }
+}
diff --git a/JobConfig/ensemble/python/calculateEvents.py b/JobConfig/ensemble/python/calculateEvents.py
index 83e3f07e..e0271378 100755
--- a/JobConfig/ensemble/python/calculateEvents.py
+++ b/JobConfig/ensemble/python/calculateEvents.py
@@ -7,7 +7,7 @@ def main(args):
       getPOT(float(args.livetime), str(args.BB),True)
     if(args.prc == "CeMLeadingLog" or args.prc == "CePLeadingLog"):
       Yield = ce_normalization(float(args.livetime), float(args.rue), str(args.BB))
-      print("CeP",Yield)
+      print("Ce",Yield)
     if(args.prc == "GetRMUE"):
       Yield = get_ce_rmue(float(args.livetime), float(args.nsig), str(args.BB))
       print(Yield)
diff --git a/JobConfig/ensemble/python/normalizations.py b/JobConfig/ensemble/python/normalizations.py
index cb4907f0..23dc415b 100755
--- a/JobConfig/ensemble/python/normalizations.py
+++ b/JobConfig/ensemble/python/normalizations.py
@@ -3,7 +3,9 @@
 import argparse
 import ROOT
 import math
+import random
 import os
+import numpy as np
 
 # numbers
 captures_per_stopped_muon = 0.609 # from AL capture studies
@@ -101,9 +103,9 @@ def getPOT(onspilltime, run_mode = '1BB',printout=False, frac=1): #livetime in s
 # get CE normalization:
 def ce_normalization(onspilltime, rue, run_mode = '1BB'):
     POT = getPOT(onspilltime, run_mode)
-    
-    #print(f"Expected CE's {POT * target_stopped_mu_per_POT * captures_per_stopped_muon * rue}")
-    return POT * target_stopped_mu_per_POT * captures_per_stopped_muon * rue
+    N_CE = POT * target_stopped_mu_per_POT * captures_per_stopped_muon * rue
+    Poisson = np.random.poisson(lam=(N_CE ))
+    return  Poisson
 
 # get DIO normalization:
 def dio_normalization(onspilltime, emin, run_mode = '1BB'):
diff --git a/JobConfig/ensemble/scripts/Stage3_addsignal.sh b/JobConfig/ensemble/scripts/Stage3_addsignal.sh
old mode 100755
new mode 100644
diff --git a/JobConfig/ensemble/scripts/Stage3_addsignal_easy.sh b/JobConfig/ensemble/scripts/Stage3_addsignal_easy.sh
new file mode 100755
index 00000000..aa46ac03
--- /dev/null
+++ b/JobConfig/ensemble/scripts/Stage3_addsignal_easy.sh
@@ -0,0 +1,233 @@
+#!/usr/bin/bash
+usage() { echo "Usage: $0
+  e.g. Stage3_addsignal.sh --known MDS2a --signal CeMLeadingLog --rate 1e-13 --nexp 3
+  usage:
+  --owner = the username of your account (or mu2e if you are using mu2epro);
+  --known = known physics tag e.g. MDS2a
+  --rate = chosen rate e.g. 1e-14 (note this could be edited during the process so check print outs)
+  --signal = primary name of chosen signal e.g. CeMLeadingLog for the e- ce leadinglog samples
+  --release = SimJob tag e.g. MDC2020aw
+  --dbpurpose = db purpose of input mcs files e.g. perfect or best
+  --dbversion = db version e.g. v1_3
+  --nexp = number of sets of mixed samples or 'pseudo experiments' to make default is 1
+  --chooselivetime = chose a livetime in seconds e.g 86000
+  
+  NOTE: assumes signal and known are the same versions
+"
+}
+
+# Function: Exit with error.
+exit_abnormal() {
+  usage
+  exit 1
+}
+OWNER="mu2e"
+KNOWN="MDS2a" #background sample tag
+RATE=1e-13
+SIGNAL="CeMLeadingLog" #name as given to primary during production
+RELEASE="MDC2020ba"
+DBPURPOSE="best"
+DBVERSION="v1_3"
+NEXP=1
+CHOOSE=0.
+SETUP="" #musing path
+
+while getopts ":-:" options; do
+  case "${options}" in
+    -)
+      case "${OPTARG}" in
+        owner)
+          OWNER=${!OPTIND} OPTIND=$(( $OPTIND + 1 ))
+          ;;
+        known)
+          KNOWN=${!OPTIND} OPTIND=$(( $OPTIND + 1 ))
+          ;;
+        rate)
+          RATE=${!OPTIND} OPTIND=$(( $OPTIND + 1 ))
+          ;;
+        signal)
+          SIGNAL=${!OPTIND} OPTIND=$(( $OPTIND + 1 ))
+          ;;
+        release)
+          RELEASE=${!OPTIND} OPTIND=$(( $OPTIND + 1 ))
+          ;;
+        dbversion)
+          DBVERSION=${!OPTIND} OPTIND=$(( $OPTIND + 1 ))
+          ;;
+        dbpurpose)
+          DBPURPOSE=${!OPTIND} OPTIND=$(( $OPTIND + 1 ))
+          ;;
+        release)
+          RELEASE=${!OPTIND} OPTIND=$(( $OPTIND + 1 ))
+          ;;
+       nexp)
+          NEXP=${!OPTIND} OPTIND=$(( $OPTIND + 1 ))
+          ;;
+        chooselivetime)
+          CHOOSE=${!OPTIND} OPTIND=$(( $OPTIND + 1 ))
+          ;;
+        *)
+          echo "Unknown option " ${OPTARG}
+          exit_abnormal
+          ;;
+        esac;;
+    :)                                    # If expected argument omitted:
+      echo "Error: -${OPTARG} requires an argument."
+      exit_abnormal                       # Exit abnormally.
+      ;;
+    *)                                    # If unknown (any other) option:
+      exit_abnormal                       # Exit abnormally.
+      ;;
+    esac
+done
+
+# step 1: check livetime of the tag
+GEN_LIVETIME=""
+GEN_JOBS=""
+# extract config file from disk:
+CONFIG=${KNOWN}.txt
+
+echo "running: mu2eDatasetFileList cnf.${OWNER}.ensemble${KNOWN}.${RELEASE}${CURRENT}.txt"
+
+mu2eDatasetFileList cnf.${OWNER}.ensemble${KNOWN}.${RELEASE}${CURRENT}.txt >> config.txt
+# Read each line (file path) from the input file
+while IFS= read -r file_path; do
+    if [ -f "$file_path" ]; then
+        cp "$file_path" ${KNOWN}.txt
+    fi
+done < config.txt
+
+while IFS='= ' read -r col1 col2
+do 
+    if [[ "${col1}" == "livetime" ]] ; then
+      GEN_LIVETIME=${col2}
+      LIVETIME=${col2}
+    fi
+    if [[ "${col1}" == "njobs" ]] ; then
+      GEN_JOBS=${col2}
+    fi
+    if [[ "${col1}" == "BB" ]] ; then
+      BB=${col2}
+    fi
+    
+done <${CONFIG}
+echo "extracted config for ${KNOWN}"
+echo "found ${GEN_LIVETIME} ${BB}"
+rm *.csv
+# if user has chosen to sample only a smaller amount of livetime then override
+if (awk "BEGIN {exit !(${CHOOSE} != 0)}") ; then
+  echo "livetime chosen to be ${CHOOSE} s"
+  LIVETIME=$(awk "BEGIN {print ${CHOOSE}}" LIVETIME="${CHOOSE}")
+fi
+if (awk "BEGIN {exit !(${CHOOSE} > ${GEN_LIVETIME})}") ; then
+  echo "ERROR: users chosen livetime is larger than total sample size, defaulting to ${GEN_LIVETIME} s"
+  LIVETIME=$(awk "BEGIN {print ${GEN_LIVETIME}}" LIVETIME="${GEN_LIVETIME}")
+fi
+echo "livetime ${LIVETIME}s is initated, watch for changes...."
+
+# find how many known files are for livetime
+N_TOTAL_KNOWN=$(samDatasetsSummary.sh mcs.${OWNER}.ensemble${KNOWN}Mix${BB}Triggered.${RELEASE}_${DBPURPOSE}_${DBVERSION}.art  | awk '/Files/ {print $2}')
+LIVETIME_PER_FILE=$(awk "BEGIN {printf \"%.0f\", ${GEN_LIVETIME}/${N_TOTAL_KNOWN}}")
+echo "livetime per file ${LIVETIME_PER_FILE}"
+N_KNOWN_FILES_TO_USE=$(awk "BEGIN {printf \"%.0f\", ${LIVETIME}/${LIVETIME_PER_FILE}}")
+echo "${N_KNOWN_FILES_TO_USE} files of ${KNOWN} to be used with livetime of ${LIVETIME} s"
+
+# actual livetime that will be used for normalization of signal depends on int number of files
+LIVETIME=$(awk "BEGIN {printf \"%.0f\", ${N_KNOWN_FILES_TO_USE}*${LIVETIME_PER_FILE}}")
+echo "IMPORTANT: livetime ${LIVETIME}s is selected based on need for integar number of files"
+
+# understand how many events are present, and what fraction we need to sample
+echo "accessing " mcs.${OWNER}.${SIGNAL}Mix${BB}Triggered.${RELEASE}_${DBPURPOSE}_${DBVERSION}.art
+NGEN=10000000
+#(samDatasetsSummary.sh mcs.${OWNER}.${SIGNAL}Mix${BB}Triggered.${RELEASE}_${DBPURPOSE}_${DBVERSION}.art  | awk '/Generated/ {print $2}') #FIXME
+
+echo "sample mcs.${OWNER}.${SIGNAL}Mix${BB}Triggered.${RELEASE}_${DBPURPOSE}_${DBVERSION}.art contains ${NGEN} gen events"
+
+# recheck rate for new Nfiles
+#RATE=$(calculateEvents.py --livetime ${LIVETIME} --BB ${BB} --nsig ${NSIG} --prc "GetRATE" )
+#echo "can only sample full files, sampling ${N_SIGNAL_FILES_TO_USE} files so ${NSIG} and ${RATE}"
+
+#need to store this somewhere, amend the .config and make an associated config for combined sample with nexp, rate, livetime_rate added at end of original.
+echo "======= combined samples info =========">> ${KNOWN}.txt
+echo "signal= ${SIGNAL}">> ${KNOWN}.txt
+echo "Rmue= ${RATE}">> ${KNOWN}.txt
+echo "livetime_combined= ${LIVETIME}">> ${KNOWN}.txt
+echo "npseudo_experiments= ${NEXP}">> ${KNOWN}.txt
+
+# build complete list
+rm filenames_All_${SIGNAL}
+rm filenames_All_${KNOWN}
+rm filenames_*
+echo "looking for mcs.${OWNER}.${SIGNAL}Mix${BB}Triggered.${RELEASE}_${DBPURPOSE}_${DBVERSION}.art"
+mu2eDatasetFileList "mcs.${OWNER}.${SIGNAL}Mix${BB}Triggered.${RELEASE}_${DBPURPOSE}_${DBVERSION}.art" > filenames_All_${SIGNAL} 
+
+mu2eDatasetFileList nts.mu2e.ensemble${KNOWN}Mix${BB}Triggered.${RELEASE}_${DBPURPOSE}_${DBVERSION}_v06_06_00.root > filenames_All_${KNOWN}
+
+# step: split the signal files to get an exact number:
+i=1
+while [ $i -le ${NEXP} ]
+do
+  # remove old files
+  rm ntuple_$i.fcl
+  rm splitter_$i.fcl
+  
+  # calculate yield of signal for chose rate, if > 0 then proceed --> use python scripts
+  NSIG=$(calculateEvents.py --livetime ${LIVETIME} --prc ${SIGNAL} --BB ${BB} --rue ${RATE})
+  echo "${RATE} for ${BB} and ${LIVETIME} s means ${NSIG} events will be sampled"
+  NSIG=$(awk "BEGIN {printf \"%.0f\", ${NSIG}}")
+
+  # calculate number of files
+  N_TOTAL_SIGNAL=$(samDatasetsSummary.sh mcs.${OWNER}.${SIGNAL}Mix${BB}Triggered.${RELEASE}_${DBPURPOSE}_${DBVERSION}.art  | awk '/Files/ {print $2}')
+  EVENTS_PER_FILE=$(awk "BEGIN {printf \"%.0f\", ${NGEN}/${N_TOTAL_SIGNAL}}")
+  echo "signal sample has ${N_TOTAL_SIGNAL} files with ${EVENTS_PER_FILE} events per file"
+  N_SIGNAL_FILES_TO_USE=$(awk "BEGIN {printf \"%.0f\", ${NSIG}/${EVENTS_PER_FILE}}")
+  
+  # if its < 1 file the above will be 0, so we need to make sure we use at least 1 file here
+  if (( N_SIGNAL_FILES_TO_USE == 0 )); then
+    N_SIGNAL_FILES_TO_USE=1
+  fi
+  echo "based on requested rate, will use ${N_SIGNAL_FILES_TO_USE} signal files"
+  
+  # build the splitter .fcl file and run on the chosen samples
+  echo "will sample ${N_SIGNAL_FILES_TO_USE} signal files"
+  # randomly select a file here
+  shuf -n ${N_SIGNAL_FILES_TO_USE} filenames_All_${SIGNAL} > temp
+  shuf temp > filenames_ChosenSig_$i
+  rm temp
+  # construct .fcl
+  echo "#include \"Production/JobConfig/ensemble/fcl/split.fcl\"" > splitter_$i.fcl
+  echo "source.fileNames: [" >> splitter_$i.fcl
+  while IFS= read -r line; do
+    echo "adding file: " $line
+    echo "\"$line\"" >> splitter_$i.fcl
+    if (( ${N_SIGNAL_FILES_TO_USE} > 1 )); then
+      echo "," >> splitter_$i.fcl
+    fi
+  done < "filenames_ChosenSig_$i"
+  echo "]" >> splitter_$i.fcl
+  echo "source.maxEvents: ${NSIG}" >> splitter_$i.fcl
+  echo "outputs.out.fileName: \"mcs.${OWNER}.${SIGNAL}Mix${BB}TriggeredSplit.${RELEASE}_${DBPURPOSE}_${DBVERSION}.${i}.art\"" >> splitter_$i.fcl
+  cmd=$(mu2e -c splitter_$i.fcl)
+  echo "Running: $cmd"
+  # run the splitting function
+  $cmd
+  
+  # make the ntuples
+  echo "#include \"EventNtuple/fcl/from_mcs-mockdata.fcl\"" >> ntuple_$i.fcl
+  echo "services.TFileService.fileName: \"nts.${OWNER}.${SIGNAL}Mix${BB}TriggeredSplit.${RELEASE}_${DBPURPOSE}_${DBVERSION}.${i}.root\"" >> ntuple_$i.fcl
+  cmd=$(mu2e -c ntuple_$i.fcl mcs.${OWNER}.${SIGNAL}Mix${BB}TriggeredSplit.${RELEASE}_${DBPURPOSE}_${DBVERSION}.${i}.art)
+  echo "Running: $cmd"
+  $cmd
+  ls nts.${OWNER}.${SIGNAL}Mix${BB}TriggeredSplit.${RELEASE}_${DBPURPOSE}_${DBVERSION}.${i}.root > temp
+
+  # create randomly mixed list of ntuples
+  shuf -n ${N_KNOWN_FILES_TO_USE} filenames_All_${KNOWN} >> temp
+  shuf temp > filenames_ChosenMixed_$i
+  rm temp
+  i=$((i + 1))
+
+done
+mkdir fcl
+mv *.fcl fcl
+echo "finished compiling list of chosen ntuples"
+rm *.csv
diff --git a/JobConfig/ensemble/scripts/combine_ntuples.sh b/JobConfig/ensemble/scripts/combine_ntuples.sh
new file mode 100755
index 00000000..ab79f1a0
--- /dev/null
+++ b/JobConfig/ensemble/scripts/combine_ntuples.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+# usage: "combine_ntuples.sh 1 MDS2c" where first arg is ther iteration and second is the known tag
+i=$1
+KNOWN=$2
+CONFIG=${KNOWN}.txt
+BB=""
+SIGNAL=""
+RMUE=""
+
+while IFS='= ' read -r col1 col2
+do 
+    if [[ "${col1}" == "livetime_combined" ]] ; then
+      LIVETIME=${col2}
+    fi
+    if [[ "${col1}" == "Rmue" ]] ; then
+      RMUE=${col2}
+    fi
+    if [[ "${col1}" == "signal" ]] ; then
+      SIGNAL=${col2}
+    fi
+    if [[ "${col1}" == "BB" ]] ; then
+      BB=${col2}
+    fi
+done <${CONFIG}
+
+
+INPUT_LIST="filenames_ChosenMixed_$i"
+OUTPUT_LIST="merged_list_$i.txt"
+OUTPUT_DIR="merged_files_$i"
+OUTNAME="nts.mu2e.ensemble${KNOWN}Mix${BB}_${SIGNAL}_${RMUE}_${LIVETIME}.$i"
+
+FILES_PER_MERGE=2 # Set the number of files to merge at a time
+
+# Create the output directory if it doesn't exist
+mkdir -p "$OUTPUT_DIR"
+
+# Clear the output list file before starting
+> "$OUTPUT_LIST"
+
+# --- Main Logic ---
+
+# Use a while loop to read input files and an array to group them
+file_group=()
+counter=0
+group_counter=1
+
+while IFS= read -r root_file; do
+  file_group+=("$root_file")
+  counter=$((counter + 1))
+
+  # Merge when the group is full or all files are processed
+  if [[ ${#file_group[@]} -eq $FILES_PER_MERGE ]] || [[ -z "$root_file" && ${#file_group[@]} -gt 0 ]]; then
+    
+    # Define the output file name
+    output_filename="${OUTPUT_DIR}/${OUTNAME}_${group_counter}.root"
+
+    echo "Merging group $group_counter: ${#file_group[@]} files into $output_filename"
+    hadd -f "$output_filename" "${file_group[@]}"
+
+    # Add the new file to the output list
+    echo "$output_filename" >> "$OUTPUT_LIST"
+
+    # Reset for the next group
+    file_group=()
+    group_counter=$((group_counter + 1))
+  fi
+done < "$INPUT_LIST"
+
+# Check for any remaining files in the last group
+if [[ ${#file_group[@]} -gt 0 ]]; then
+  output_filename="${OUTPUT_DIR}/${OUTNAME}_${group_counter}.root"
+  echo "Merging final group: ${#file_group[@]} files into $output_filename"
+  hadd -f "$output_filename" "${file_group[@]}"
+  echo "$output_filename" >> "$OUTPUT_LIST"
+fi
+
+echo "Merge process complete. Merged files are in '$OUTPUT_DIR/'."
+echo "List of merged files is in '$OUTPUT_LIST'."
diff --git a/Scripts/POMS/samDatasetsSummary.sh b/Scripts/POMS/samDatasetsSummary.sh
index 715c502f..7da04b2b 100755
--- a/Scripts/POMS/samDatasetsSummary.sh
+++ b/Scripts/POMS/samDatasetsSummary.sh
@@ -1,12 +1,19 @@
 #!/bin/bash
 # usage
 if [[ -z "$1" ]]; then
-  echo "Usage: $0 <dataset>" >&2
+  echo "Usage: $0 <dataset> [--sample-files N]" >&2
+  echo "  --sample-files N: Number of files to sample for generated events calculation (default: 10)" >&2
   exit 1
 fi
 
 # Get the dataset name
 dataset="$1"
+
+# Parse command line arguments
+sample_files=10
+if [[ "$2" == "--sample-files" && -n "$3" ]]; then
+  sample_files="$3"
+fi
 # Obtain summary once
 summary_txt=$(samweb list-definition-files --summary "$dataset" 2>/dev/null)
 nfiles=$(echo "$summary_txt" | awk '/File count:/ {print $3}')
@@ -24,9 +31,21 @@ if (( nfiles == 0 )); then
     exit 1
 fi
 
-# Calculate total generated events from all files
-generated=$(samweb list-definition-files "$dataset" 2>/dev/null | \
-  xargs -n1 samweb get-metadata | awk '/dh.gencount/ { sum += $2 } END { print sum+0 }')
+# Calculate total generated events by extrapolating from sampled files
+# OPTIMIZATION: Use SAM's metadata query to get dh.gencount directly
+sample_sum=$(samweb list-definition-files "$dataset" 2>/dev/null | \
+  head -"$sample_files" | while read file; do
+    samweb get-metadata "$file" 2>/dev/null | awk '/dh.gencount/ {print $2}'
+  done | awk '{sum += $1} END {print sum+0}')
+
+# Extrapolate to total number of files
+if (( sample_sum > 0 && sample_files > 0 )); then
+  avg_per_file=$((sample_sum / sample_files))
+  generated=$((avg_per_file * nfiles))
+  echo "Debug: Sampled $sample_files files for dh.gencount, sum=$sample_sum, avg=$avg_per_file, extrapolated=$generated" >&2
+else
+  generated=0
+fi
 
 printf "Triggered: %s\nGenerated: %s\nFiles: %s\nSize: %s\n" "$triggered" "$generated" "$nfiles" "$size"
 
diff --git a/Scripts/gen_Mix.sh b/Scripts/gen_Mix.sh
index 9ad8aeea..8cb41c6a 100755
--- a/Scripts/gen_Mix.sh
+++ b/Scripts/gen_Mix.sh
@@ -246,27 +246,21 @@ if [ "${PRIMARY_DESC}" == "NoPrimary" ]; then
   echo '#include "Production/JobConfig/mixing/NoPrimary.fcl"' >> mix.fcl
 fi
 
-# Override dts filters conditioned on primary
-if [ "${ENSEMBLE}" == 0 ]; then
-  if [ "${PRIMARY_DESC}" == *"DIOtail"* ]; then
-    filter="Production/JobConfig/mixing/filters/DIOtail.fcl"
-  fi
-  els
-  filter="Production/JobConfig/mixing/filters/${PRIMARY_DESC}.fcl"
-  if test -f "${PRODUCTION_INC}/${filter}"; then
-    echo "#include \"${filter}\"" >> mix.fcl
-  fi
-fi
 
 # Override dts filters conditioned on primary
 if [ "${ENSEMBLE}" == 0 ]; then
-  if [ "${PRIMARY_DESC}" == *"DIOtail"* ]; then
+  if [[ "${PRIMARY_DESC}" == *"DIOtail"* ]]; then
     filter="Production/JobConfig/mixing/filters/DIOtail.fcl"
+    # Extract the numeric suffix from name
+    [[ "${PRIMARY_DESC}" =~ [0-9]+$ ]]
+    minE="${BASH_REMATCH[0]}"
+    echo "Extracted number: $minE"
+    
   else
     filter="Production/JobConfig/mixing/filters/${PRIMARY_DESC}.fcl"
   fi
   if test -f "${PRODUCTION_INC}/${filter}"; then
-    echo "#include \"${filter}\"" >> mix.fcl
+    
   fi
 fi
 

From 6b28c3f85a03598ea0e3ff5a063e958e5bfcc2ef Mon Sep 17 00:00:00 2001
From: sophieMu2e <sophie@fnal.gov>
Date: Tue, 23 Sep 2025 12:32:40 -0500
Subject: [PATCH 4/6] new scripts work

---
 JobConfig/ensemble/fcl/split.fcl | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/JobConfig/ensemble/fcl/split.fcl b/JobConfig/ensemble/fcl/split.fcl
index a0e65290..1d152d11 100644
--- a/JobConfig/ensemble/fcl/split.fcl
+++ b/JobConfig/ensemble/fcl/split.fcl
@@ -2,13 +2,11 @@
 
 process_name: artsplit
 
-# read only the first 100 events
+# read only the first n events
 source: {
   module_type: RootInput
   fileNames: @nil
-  //[ "/pnfs/mu2e/tape/phy-sim/mcs/mu2e/CeMLeadingLogOnSpillTriggered/MDC2020au_perfect_v1_3/art/71/3e/mcs.mu2e.CeMLeadingLogOnSpillTriggered.MDC2020au_perfect_v1_3.001202_00001366.art" ]
   maxEvents: 0
-  //1150
 }
 
 services: { 
@@ -25,6 +23,6 @@ physics: {
 outputs: {
   out: {
     module_type: RootOutput
-    fileName: "first2300_2.art"
+    fileName: @nil
   }
 }

From f369bec950dcad135cee0f67ee117247e2ee93f5 Mon Sep 17 00:00:00 2001
From: David Nathan Brown <dave_brown@lbl.gov>
Date: Tue, 30 Sep 2025 16:51:13 -0700
Subject: [PATCH 5/6] Override the default geometry from Offline

---
 JobConfig/common/epilog.fcl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/JobConfig/common/epilog.fcl b/JobConfig/common/epilog.fcl
index b666c06e..3d48940a 100644
--- a/JobConfig/common/epilog.fcl
+++ b/JobConfig/common/epilog.fcl
@@ -6,4 +6,5 @@ services.TimeTracker.printSummary: true
 services.scheduler.wantSummary: true
 #show summary of error logger
 services.message.destinations.log.outputStatistics : true
-
+# define the default geometry. This overrides the Offline default
+services.GeometryService.inputFile: "Offline/Mu2eG4/geom/geom_common.txt"

From 3863258596b6eab107df48143630b43ce82d5899 Mon Sep 17 00:00:00 2001
From: sophieMu2e <sophie@fnal.gov>
Date: Thu, 2 Oct 2025 12:49:57 -0500
Subject: [PATCH 6/6] gen mix fix

---
 Scripts/gen_Mix.sh | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Scripts/gen_Mix.sh b/Scripts/gen_Mix.sh
index 8cb41c6a..77a701f9 100755
--- a/Scripts/gen_Mix.sh
+++ b/Scripts/gen_Mix.sh
@@ -254,13 +254,11 @@ if [ "${ENSEMBLE}" == 0 ]; then
     # Extract the numeric suffix from name
     [[ "${PRIMARY_DESC}" =~ [0-9]+$ ]]
     minE="${BASH_REMATCH[0]}"
-    echo "Extracted number: $minE"
-    
   else
     filter="Production/JobConfig/mixing/filters/${PRIMARY_DESC}.fcl"
   fi
   if test -f "${PRODUCTION_INC}/${filter}"; then
-    
+    echo "#include \"${filter}\"" >> mix.fcl
   fi
 fi