In [None]:
%load_ext autoreload
%autoreload 2

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
# from util import *

import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

# Utils

In [None]:
MASS_MUON = 0.105658
MASS_NEUTRON = 0.9395654
MASS_PROTON = 0.938272
MASS_A = 22*MASS_NEUTRON + 18*MASS_PROTON - 0.34381
BE = 0.0295
MASS_Ap = MASS_A - MASS_NEUTRON + BE

In [None]:
def mag2d(x, y):
    return np.sqrt(x**2 + y**2)

In [None]:
def issignal(df):
    # return InFV(df.position, 50) & (df.iscc) & (df.nmu == 1) & (df.np == 1)
    return (df.iscc) & (df.nmu == 1) & (df.np == 1)

In [None]:
def InFV(data): # cm
    xmin = -199.15 + 10
    ymin = -200. + 10
    zmin = 0.0 + 10
    xmax = 199.15 - 10
    ymax =  200. - 10
    zmax =  500. - 50
    return (data.x > xmin) & (data.x < xmax) & (data.y > ymin) & (data.y < ymax) & (data.z > zmin) & (data.z < zmax)

def InBeam(t):
    return (t > 0.) & (t < 1.800)

In [None]:
def is_cosmic(df):
    return (df.slc.truth.pdg == -1)

def is_FV(df): 
    return (InFV(df.position))

def is_numu(df):
    return (np.abs(df.pdg) == 14)

def is_CC(df):
    return (df.iscc == 1)

def is_NC(df):
    return (df.iscc == 0)

def is_1p0pi(df):
    return (df.nmu_20MeV == 1) & (df.np_50MeV == 1) & (df.npi_40MeV == 0) & (df.npi0 == 0)

def is_signal(df):
    return is_numu(df) & is_CC(df) & is_1p0pi(df) & is_FV(df)

def is_outFV(df):
    return is_numu(df) & is_CC(df) & is_1p0pi(df) & np.invert(is_FV(df))

def is_othernumuCC(df):
    return is_numu(df) & is_CC(df) & np.invert(is_1p0pi(df)) & is_FV(df)

# Plotters

In [None]:
mode_list = [0, 10, 1, 2, 3]
mode_labels = ['QE', 'MEC', 'RES', 'SIS/DIS', 'COH', "other"]
mode_colors = ["darkorchid", "royalblue", "forestgreen", "darkorange", "firebrick"]

def breakdown_mode(var, df):
    ret = [var[df.genie_mode == i] for i in mode_list] 
    return ret


In [None]:
top_labels = ["Signal",
              "Other numu CC",
              "NC",
              "Out of FV",
              "Cosmic",
              "Other"]

top_colors = []
def breakdown_top(var, df):
    ret = [var[is_signal(df)],
           var[is_othernumuCC(df)],
           var[is_NC(df)],
           var[is_outFV(df)],
           var[is_cosmic(df)],
           var[np.invert(is_signal(df) | is_othernumuCC(df) | is_NC(df) | is_outFV(df) | is_cosmic(df))]
           ]
    return ret

# Selection

In [None]:
# df = pd.read_hdf("/exp/sbnd/data/users/munjung/osc/sbnd.df", "evt")
df = pd.read_hdf("/home/munjung/osc/sync/sbnana/sbnana/SBNAna/osc-villiage/test.df", "evt")

In [None]:
# vertex in FV

df = df[InFV(df.slc.vertex)]

In [None]:
# cosmic rejection 

# var = [df.slc.nu_score[is_cosmic(df)],
#        df.slc.nu_score[np.invert(is_cosmic(df))]]
# plt.hist(var, bins=21, label=["Cosmic", "Nu"], histtype="step", density=True)
# plt.legend()
# plt.show();

# Traditional 
nu_score = (df.slc.nu_score > 0.5)
# f_match = (df.slc.fmatch.score < 7.0) & (InBeam(df.slc.fmatch.time))
cosmic_rejection = nu_score #& f_match

# CRUMBS
# crumbs = (df.slc_crumbs_result.score > 0)
# cosmic_rejection = (crumbs)

df = df[cosmic_rejection]

In [None]:
var = df.mu.pfp.trk.P.p_muon
pvar = breakdown_top(var, df)
n, bins, _ = plt.hist(pvar, bins=np.linspace(0,2,21), stacked=True, 
                      label=top_labels)
print("signal purity {:.2f} %".format(100*n[0].sum()/n[-1].sum()))
plt.legend()
plt.show();

## Select 1mu1p topology

In [None]:
# reject slices with any showers or 3rd tracks
twoprong_cut = (np.isnan(df.other_shw_length) & np.isnan(df.other_trk_length))

df = df[twoprong_cut]

In [None]:
var = df.mu.pfp.trk.P.p_muon
pvar = breakdown_top(var, df)
n, bins, _ = plt.hist(pvar, bins=np.linspace(0,2,21), stacked=True, 
                      label=top_labels)
print("signal purity {:.2f} %".format(100*n[0].sum()/n[-1].sum()))
plt.legend()
plt.show();

In [None]:
# muon cut on muon candidates
MUSEL_MUSCORE_TH = 25
MUSEL_PSCORE_TH = 100
MUSEL_LEN_TH = 50

# TODO: use scores of all 3 planes
# muon_chi2 = (Avg(df, "muon", drop_0=True) < MUSEL_MUSCORE_TH) & (Avg(df, "proton", drop_0=True) > MUSEL_PSCORE_TH)

# TODO: used BDT scores
# len_cut = (masterdf.len.squeeze() > MUSEL_LEN_TH)
# dazzle_muon = (masterdf.dazzle.muonScore > 0.6)
# muon_cut = (muon_chi2) & (len_cut | dazzle_muon)

mu_score_cut = (df.mu.pfp.trk.chi2pid.I2.chi2_muon < MUSEL_MUSCORE_TH) & (df.pfp.trk.chi2pid.I2.chi2_proton > MUSEL_PSCORE_TH)
mu_len_cut = (df.mu.pfp.trk.len > MUSEL_LEN_TH)
mu_cut = (mu_score_cut) & (mu_len_cut)

# proton cut on proton candidates
PSEL_MUSCORE_TH = 0
PSEL_PSCORE_TH = 90
p_score_cut = (df.p.pfp.trk.chi2pid.I2.chi2_muon > PSEL_MUSCORE_TH) & (df.p.pfp.trk.chi2pid.I2.chi2_proton < PSEL_PSCORE_TH) 
p_cut = p_score_cut

# select slices with mu+p
slc_mu_cut = mu_cut.groupby(level=[0,1,2]).any()
slc_p_cut = p_cut.groupby(level=[0,1,2]).any()
df = df.loc[slc_mu_cut & slc_p_cut]

In [None]:
var = df.mu.pfp.trk.P.p_muon
pvar = breakdown_top(var, df)
n, bins, _ = plt.hist(pvar, bins=np.linspace(0,2,21), stacked=True, 
                      label=top_labels)
print("signal purity {:.2f} %".format(100*n[0].sum()/n[-1].sum()))
plt.legend()
plt.show();

In [None]:
# stub cut

binx = np.linspace(0, 5,11)
biny = np.linspace(0, 800000, 17)

lines = [[(3, 3e5), (3, 1e6)], [(1.5, 3e5), (1.5, 4e5)], [(0.5, 4e5), (0.5, 5.5e5)], 
         [(1.5, 3e5), (3, 3e5)], [(0.5, 4e5), (1.5, 4e5)], [(0, 5.5e5), (0.5, 5.5e5)]]

fig, ax = plt.subplots()
when = (np.abs(stubdf.truth.p.pdg) == 2212) & (stubdf.truth.p.genE - MASS_PROTON < 0.05) #& (stubdf.nplane == 1)
_ = plt.hist2d(stubdf.length[when], (stubdf.inc_sub_charge / stubdf.length)[when], bins=[binx, biny])
plt.ticklabel_format(axis='y', style='sci', scilimits=(5,5))
lc = mc.LineCollection(lines, linewidths=2, color="red", linestyle="--")
ax.add_collection(lc)

plt.xlabel("Length [cm]")
plt.ylabel("dQ/dx [#elec/cm]")
plt.title("Protons")
plt.show();

fig, ax = plt.subplots()
when = (np.abs(stubdf.truth.p.pdg) != 2212) & (stubdf.truth.p.interaction_id > 0)
_ = plt.hist2d(stubdf.length[when], (stubdf.charge / stubdf.length)[when], bins=[binx, biny])
plt.ticklabel_format(axis='y', style='sci', scilimits=(5,5))
lc = mc.LineCollection(lines, linewidths=2, color="red", linestyle="--")
ax.add_collection(lc)

plt.xlabel("Length [cm]")
plt.ylabel("dQ/dx [#elec/cm]")
plt.title("False Positive")

In [None]:
# no stub

df = df[np.invert(df.slc.has_stub)]

In [None]:
var = df.mu.pfp.trk.P.p_muon
pvar = breakdown_top(var, df)
n, bins, _ = plt.hist(pvar, bins=np.linspace(0,2,21), stacked=True, 
                      label=top_labels)
print("signal purity {:.2f} %".format(100*n[0].sum()/n[-1].sum()))
plt.legend()
plt.show();

In [None]:
# TODO: merge didn't overwrite nans -- expected?

mudf = df.loc[mu_cut].groupby(level=[0,1]).head(1).pfp.trk
mudf.index = mudf.index.droplevel(-1)
pdf = df.loc[p_cut].groupby(level=[0,1]).head(1).pfp.trk
pdf.index = pdf.index.droplevel(-1)
slcdf = df.groupby(level=[0,1]).head(1)
slcdf.index = slcdf.index.droplevel(-1)

In [None]:
# Caculate transverse kinematics

mu_p = mudf.P.p_muon
mu_p_x = mu_p * mudf.cos.x
mu_p_y = mu_p * mudf.cos.y
mu_p_z = mu_p * mudf.cos.z
mu_phi_x = mu_p_x/mag2d(mu_p_x, mu_p_y)
mu_phi_y = mu_p_y/mag2d(mu_p_x, mu_p_y)

p_p = pdf.P.p_proton
p_p_x = p_p * pdf.cos.x
p_p_y = p_p * pdf.cos.y
p_p_z = p_p * pdf.cos.z
p_phi_x = p_p_x/mag2d(p_p_x, p_p_y)
p_phi_y = p_p_y/mag2d(p_p_x, p_p_y)

mu_Tp_x = mu_phi_y*mu_p_x - mu_phi_x*mu_p_y
mu_Tp_y = mu_phi_x*mu_p_x - mu_phi_y*mu_p_y
mu_Tp = mag2d(mu_Tp_x, mu_Tp_y)

p_Tp_x = mu_phi_y*p_p_x - mu_phi_x*p_p_y
p_Tp_y = mu_phi_x*p_p_x - mu_phi_y*p_p_y
p_Tp = mag2d(p_Tp_x, p_Tp_y)

del_Tp_x = mu_Tp_x + p_Tp_x
del_Tp_y = mu_Tp_y + p_Tp_y
del_Tp = mag2d(del_Tp_x, del_Tp_y)

del_alpha = np.arccos(-(mu_Tp_x*del_Tp_x + mu_Tp_y*del_Tp_y)/(mu_Tp*del_Tp))
del_theta = np.arccos(-(mu_Tp_x*p_Tp_x + mu_Tp_y*p_Tp_y)/(mu_Tp*p_Tp))

mu_E = mag2d(mu_p, MASS_MUON)
p_E = mag2d(p_p, MASS_PROTON)

R = MASS_A + mu_p_z + p_p_z - mu_E - p_E
del_Lp = 0.5*R - mag2d(MASS_Ap, del_Tp)**2/(2*R)
del_p = mag2d(del_Tp, del_Lp)

In [None]:
DELP_TH = 0.25

In [None]:
var = breakdown_mode(del_p, slcdf)
n, bins, _ = plt.hist(var, bins=np.linspace(0,1,21), stacked=True, 
                      label=mode_labels, color=mode_colors)
plt.axvline(DELP_TH, color='k', linestyle="--")
plt.legend()
plt.show();

In [None]:
var = del_p
pvar = breakdown_top(var, slcdf)
n, bins, _ = plt.hist(pvar, bins=np.linspace(0,1,21), stacked=True, 
                      label=top_labels)
plt.axvline(DELP_TH, color='k', linestyle="--")
plt.legend()
plt.show();

In [None]:
# transverse momentum cut
df = slcdf[del_p < DELP_TH]

In [None]:
var = df.mu.pfp.trk.P.p_muon
pvar = breakdown_top(var, df)
n, bins, _ = plt.hist(pvar, bins=np.linspace(0,2,21), stacked=True, 
                      label=top_labels)
print("signal purity {:.2f} %".format(100*n[0].sum()/n[-1].sum()))
plt.legend()
plt.show();