In [None]:
import pandas 
import uproot 
import matplotlib.pyplot as plt 
import pandas as pd 
import numpy as np 
import matplotlib
import urllib.request, json
from scipy.optimize import curve_fit

from tqdm.notebook import tqdm
import time
import sys
import multiprocessing

In [None]:
plt.rc("font", size=14)

In [None]:
OFFBEAM = True
RUN1 = False

if not OFFBEAM:
    # NuMI commissioning+Run 1 beam ON
    MINBIAS_PRESCALE = 60
else:
    # NuMI commissioning+Run 1 beam OFF
    MINBIAS_PRESCALE = 20

datadir = "/icarus/data/users/gputnam/DMCP2023G/normdata/"

savedir = datadir + ("run1_" if RUN1 else "run2_")

dosave = True

outf = datadir + "Run%i_%sBeam.csv" % (1 if RUN1 else 2, "Off" if OFFBEAM else "On")
outf

In [None]:
if RUN1:
    if not OFFBEAM:
        # Run 1, OnBeam
        
        # In Run 1, we need to disentangle minbias and majority triggers
        rootfiles = [
            "norminfo_Run1_Onbeam_majority.root",
            "norminfo_Run1_Onbeam_minbias.root"
        ]

        filelists = [
            "ICARUSRun1OnBeamMajority_2022A_files.list",
            "ICARUSRun1OnBeamMinbias_2022A_files.list",
        ]
    else:
        # Run 1, OffBeam
        
        # In Run 1, we need to disentable minbias and majority triggers
        rootfiles = [
            "norminfo_Run1_Offbeam_majority.root",
            "norminfo_Run1_Offbeam_minbias.root"
        ]

        filelists = [
            "ICARUSRun1OffBeamMajority_2022A_files.list",
            "ICARUSRun1OffBeamMinbias_2022A_files.list",
        ]
        
        
    beamfile = "run1_beam.df"
else:
    if not OFFBEAM:
        # Run 2, OnBeam
        rootfiles = [
            "norminfo_Run2_Onbeam.root"
        ]

        filelists = [
            "ICARUSRun2DataOnBeamMajority_stage0_disk_files.list",
        ]

    else:
        # Run 2, OffBeam
        rootfiles = [
            "norminfo_Run2_Offbeam.root"
        ]

        filelists = [
            "ICARUSRun2DataOffBeamMajority_stage0_disk_files.list",
        ]
        
    beamfile = "run2_beam.df"


In [None]:
fs = [uproot.open(datadir + f) for f in rootfiles]
dfts = [f["icarusnumi"]["TriggerInfo"].arrays(library="pd") for f in fs]

In [None]:
files_per_list = []
for i, filelist in enumerate(filelists):
    thisfiles = []
    with open(datadir + filelist) as f:
        for line in f:
            thisfiles.append(line.rstrip("\n"))
    files_per_list.append(thisfiles)

In [None]:
filecount = 0

for i in range(len(files_per_list)):
    dfts[i].ifile += filecount
    filecount += len(files_per_list[i])
    
files = [item for sublist in files_per_list for item in sublist]

In [None]:
dft = pd.concat(dfts, ignore_index=True, sort=False)

dft = dft.sort_values(["run", "subrun", "event", "ifile"]).reset_index()

# Identify duplicate files
goodfiles = dft.groupby(["run", "subrun", "event"]).ifile.first()
goodfiles.name = "goodifile"
dft = dft.join(goodfiles, on=["run", "subrun", "event"])

In [None]:
dft["duplicate"] = dft.goodifile != dft.ifile

In [None]:
dft.duplicate = dft.duplicate | ((dft.trigger_type != 0) & (not RUN1)) # these are also bad, only show up in Run 2

In [None]:
dft_evtidx = dft.set_index(["run", "event"])
dft_evtidx.index.is_unique

In [None]:
duplicate_files = [files[i] for i in dft.ifile[dft.duplicate].unique()]

goodfiles = list(set(files) - set(duplicate_files))

In [None]:
duplicate_files

In [None]:
if dosave and False:
    for i, flist in enumerate(filelists):
        with open(flist.replace(".list", "_deduped.list"), "w") as f:
            for fname in goodfiles:
                if fname in files_per_list[i]:
                    f.write(fname + "\n")

In [None]:
dft["prev_trigger_timestamp_s"] = dft.prev_trigger_timestamp / 1_000_000_000
dft["trigger_timestamp_s"] = dft.trigger_timestamp / 1_000_000_000

In [None]:
trigger_cols = ['run', 'beam_type', 'source_type',
       'trigger_type', 'last_source_Type', 'prev_trigger_timestamp',
       'trigger_timestamp', 'beam_gate_timestamp', 'trigger_id', 'gate_id',
       'trigger_count', 'gate_count', 'trigger_delta', 'gate_delta',
       'gate_all_delta']

In [None]:
dft[trigger_cols]

In [None]:
beamdf = pd.read_hdf(datadir + beamfile, key="beam")

# clear beam for offbeam
if OFFBEAM:
    beamdf = beamdf[0:0]

In [None]:
beamdf

In [None]:
# Distributions

In [None]:
_ = plt.hist(beamdf.pot, bins=np.linspace(-1, 70, 101))
plt.xlabel("POT [$\\times 10^{12}$]")
plt.axvline(2, color="red")
plt.tight_layout()
plt.ylabel("Spills")
if dosave and not OFFBEAM: 
    plt.savefig(savedir + "spill_pot.pdf")
    plt.savefig(savedir + "spill_pot.svg")

In [None]:
_ = plt.hist(beamdf.horn_current, bins=np.linspace(-203, -195, 101))
plt.xlabel("Horn Current [mA]")
plt.axvline(-202, color="red")
plt.axvline(-196.4, color="red")
plt.tight_layout()
plt.ylabel("Spills")
if dosave and not OFFBEAM: 
    plt.savefig(savedir + "spill_horncurrent.pdf")
    plt.savefig(savedir + "spill_horncurrent.svg")

In [None]:
_ = plt.hist(beamdf.extrap_hptgt, bins=np.linspace(-2, 2, 101))
plt.axvline(1, color="red")
plt.axvline(-1, color="red")
plt.xlabel("Beam Position X [mm]")
plt.tight_layout()
plt.ylabel("Spills")
if dosave and not OFFBEAM: 
    plt.savefig(savedir + "spill_beamposx.pdf")
    plt.savefig(savedir + "spill_beamposx.svg")

In [None]:
_ = plt.hist(beamdf.extrap_vptgt, bins=np.linspace(-2, 2, 101))
plt.axvline(1, color="red")
plt.axvline(-1, color="red")
plt.xlabel("Beam Position Y [mm]")
plt.tight_layout()
plt.ylabel("Spills")
if dosave and not OFFBEAM: 
    plt.savefig(savedir + "spill_beamposy.pdf")
    plt.savefig(savedir + "spill_beamposy.svg")

In [None]:
_ = plt.hist(beamdf.beam_width_h, bins=np.linspace(0.5, 4, 101))
plt.axvline(0.57, color="red")
plt.axvline(1.88, color="red")
plt.xlabel("Horizontal Beam Width [mm]")
plt.tight_layout()
plt.ylabel("Spills")
if dosave and not OFFBEAM: 
    plt.savefig(savedir + "spill_beamwidthh.pdf")
    plt.savefig(savedir + "spill_beamwidthh.svg")

In [None]:
_ = plt.hist(beamdf.beam_width_v, bins=np.linspace(0.5, 4, 101))
plt.axvline(0.57, color="red")
plt.axvline(1.88, color="red")
plt.xlabel("Vertical Beam Width [mm]")
plt.tight_layout()
plt.ylabel("Spills")
if dosave and not OFFBEAM: 
    plt.savefig(savedir + "spill_beamwidthv.pdf")
    plt.savefig(savedir + "spill_beamwidthv.svg")

In [None]:
pot_cut = (beamdf.pot > 2)
hc_cut = (beamdf.horn_current > -202) & (beamdf.horn_current <= -196.4)
bp_cut = (np.abs(beamdf.extrap_hptgt) < 1) & (np.abs(beamdf.extrap_vptgt) < 1)
bw_cut = (beamdf.beam_width_h > 0.57) & (beamdf.beam_width_h <= 1.88) &\
         (beamdf.beam_width_v > 0.57) & (beamdf.beam_width_v <= 1.88)

bq_cut = pot_cut & hc_cut & bp_cut & bw_cut

beamdf["goodpot"] = beamdf.pot*bq_cut
beamdf["badspill"] = ~bq_cut

In [None]:
beamdf[pot_cut].pot.sum()/1e8

In [None]:
beamdf[bq_cut].pot.sum()/1e8

In [None]:
dft["prev_trigger_timestamp_s_fixed"] = dft.trigger_timestamp_s - dft.trigger_timestamp_s.diff() 

In [None]:
dft["trigger_timestamp_s_fixed"] = dft.trigger_timestamp_s
dft.loc[0, "trigger_timestamp_s_fixed"] = 0 # np.nan
dft.loc[0, "prev_trigger_timestamp_s_fixed"] = 0 # np.nan

In [None]:
# Merge Triggers into POT DF

# RUN 1 STRATEGY
if RUN1:
    ranges = pd.IntervalIndex.from_arrays(dft.prev_trigger_timestamp_s_fixed[~dft.duplicate] + 0.5, 
                                          dft.trigger_timestamp_s_fixed[~dft.duplicate] + 0.5)

# RUN 2 STRATEGY
else:
    ranges = pd.IntervalIndex.from_arrays(dft.prev_trigger_timestamp_s[~dft.duplicate] + 0.5, 
                                          dft.trigger_timestamp_s[~dft.duplicate] + 0.5)

In [None]:
inds = ranges.get_indexer(beamdf.time)

In [None]:
cols = [
    "run",
    "subrun",
    "event",
    "ifile",
]

for c in cols:
    if not OFFBEAM:
        beamdf[c] = dft[c][~dft.duplicate].iloc[inds].to_numpy()
        beamdf.loc[inds<0, c] = 0 # Invalid events
    else:
        beamdf[c] = []

In [None]:
beamdf[:50]

In [None]:
totpot = beamdf.groupby(["run", "subrun", "event", "ifile"]).goodpot.sum()
totpot.name = "totpot"

nspill = beamdf.groupby(["run", "subrun", "event", "ifile"]).goodpot.size()
nspill.name = "nspill"

# Bad if the spill matched to the event is bad
badspill = beamdf.groupby(["run", "subrun", "event", "ifile"]).badspill.last()
badspill.name = "badspill"

spilltime = beamdf.groupby(["run", "subrun", "event", "ifile"]).time.last()
spilltime.name = "spilltime"

In [None]:
if "totpot" in dft.columns:
    del dft["totpot"]
if "nspill" in dft.columns:
    del dft["nspill"]
if "badspill" in dft.columns:
    del dft["badspill"]
if "spilltime" in dft.columns:
    del dft["spilltime"]
    
dft = dft.join(totpot, on=["run", "subrun", "event", "ifile"])
dft = dft.join(nspill, on=["run", "subrun", "event", "ifile"])
dft = dft.join(badspill, on=["run", "subrun", "event", "ifile"])
dft = dft.join(spilltime, on=["run", "subrun", "event", "ifile"])

dft.totpot = dft.totpot.fillna(0.).astype(float)
dft.nspill = dft.nspill.fillna(0).astype(int)
dft.badspill = dft.badspill.fillna(True).astype(bool)
dft.spilltime = dft.spilltime.fillna(0).astype(float)

In [None]:
# Check merging validity -- there should not be any triggers too before the beam spill
assert((dft.trigger_timestamp_s - dft.spilltime).min() > -0.5)

In [None]:
# Bad triggers are:

# No spill associated with trigger
trigger_quality_cut = (((dft.trigger_timestamp_s - dft.spilltime) < 0.5) | OFFBEAM)

# Remove miscount between trigger and spill
trigger_quality_cut = trigger_quality_cut & ((dft.gate_delta == dft.nspill) | OFFBEAM)

# Don't count first trigger in each run
trigger_quality_cut = trigger_quality_cut & (dft.trigger_count != 1)

# In Run 1 OnBeam, cut events where the gate delta is miscounted,
# since we rely on the previous trigger to get the timestamps correct for POT accounting
if not OFFBEAM and RUN1:
    trigger_quality_cut = trigger_quality_cut & (dft.gate_count.diff() == dft.gate_delta)

In [None]:
# Remove triggers with bad spills, unless the beam is OFF
beam_quality_cut = trigger_quality_cut & (~dft.badspill | OFFBEAM) 

In [None]:
dft.totpot[trigger_quality_cut & ~dft.duplicate].sum() / dft.totpot[~dft.duplicate].sum()

In [None]:
dft["beam_quality"] = beam_quality_cut
dft["minbias"] = dft.gate_count % MINBIAS_PRESCALE == 0
dft["livetime"] = 10.1*dft.gate_delta

dft["offbeam"] = OFFBEAM

In [None]:
dft["imajority"] = (~dft.minbias).cumsum()

In [None]:
totpot_corr = dft.groupby("imajority").totpot.sum() * (MINBIAS_PRESCALE - 1) / MINBIAS_PRESCALE
totpot_corr.name = "totpot_corr"

livetime_corr = dft.groupby("imajority").livetime.sum() * (MINBIAS_PRESCALE - 1) / MINBIAS_PRESCALE
livetime_corr.name = "livetime_corr"

quality_majority = dft.groupby("imajority").beam_quality.last()
quality_majority.name = "quality_majority"

if "totpot_corr" in dft.columns:
    del dft["totpot_corr"]  
if "livetime_corr" in dft.columns:
    del dft["livetime_corr"]
if "quality_majority" in dft.columns:
    del dft["quality_majority"]
    
dft = dft.join(totpot_corr, on="imajority")
dft = dft.join(livetime_corr, on="imajority")
dft = dft.join(quality_majority, on="imajority")

dft.loc[dft.minbias, "totpot_corr"] = 0.
dft.loc[dft.minbias, "livetime_corr"] = 0.
dft.loc[dft.minbias, "quality_majority"] = False

# Don't count POT OR livetime included in events with bad triggers
dft.loc[~trigger_quality_cut, "totpot_corr"] = 0
dft.loc[~trigger_quality_cut, "livetime_corr"] = 0

In [None]:
cols_tosave = [
    "run",
    "subrun",
    "event",
    "ifile",
    "duplicate",
    "gate_count",
    "gate_delta",
    "nspill",
    "prev_trigger_timestamp_s",
    "trigger_timestamp_s",
    "badspill",
    "spilltime",
    "beam_quality",
    "quality_majority",
    "minbias",
    "offbeam",
    "totpot",
    "livetime",
    "totpot_corr",
    "livetime_corr"
]

In [None]:
dft[cols_tosave][:50]

In [None]:
if dosave:
    dft[cols_tosave].to_csv(outf)

In [None]:
outf