In [1]:
%matplotlib inline

from ROOT import TFile,vector,TGraph
import ROOT
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import numpy as np
from numpy import mean
from math import sqrt,acos,cos,sin,pi,exp,log,isnan,atan2
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from numpy import asarray
from root_pandas import read_root
from matplotlib import gridspec
from scipy import stats

Welcome to JupyROOT 6.18/04


In [2]:
def bless_scedr(row):
    scedr = float(-1)
    dx = row['Xreco'] - row['sceshift_parentX']
    dy = row['Yreco'] - row['sceshift_parentY']
    dz = row['Zreco'] - row['sceshift_parentZ']
    dr2 = dx*dx+dy*dy+dz*dz
    dr = np.sqrt(dr2)

    scedr = dr
    return scedr

def bless_scedir(row):
    scedr = float(-1)
    dx = row['Xreco'] - row['parentSCEX']
    dy = row['Yreco'] - row['parentSCEY']
    dz = row['Zreco'] - row['parentSCEZ']
    dr2 = dx*dx+dy*dy+dz*dz
    dr = np.sqrt(dr2)

    scedr = dr
    return scedr


def bless_ccqe_reweight_numu(row):
    rw = float(1)
    if row['interactionType']==1001:
        rw = xsec_tune1_graph_numu.Eval(row['energyInit']) / xsec_mcc9_graph_numu.Eval(row['energyInit'])
    return rw

def bless_ccqe_reweight_nue(row):
    rw = float(1)
    if row['interactionType']==1001:
        rw = xsec_tune1_graph_nue.Eval(row['energyInit']) / xsec_mcc9_graph_nue.Eval(row['energyInit'])
    return rw

def bless_leeweight(row):
    wgts_unfolded = np.array((0.0, 6.3744, 5.64554, 3.73055, 1.50914, 1.074093, 0.754093, 0.476307, 0.152327, 0.0))
    binedges_unfolded = np.array((0,.200,.250,.300,.350,.400,.450,.500,.600,.800,3.000))
    
    for i in range(len(wgts_unfolded)):
        if row['energyInit'] < binedges_unfolded[i+1]:
            return wgts_unfolded[i]
        
    return 0

In [3]:
# Time to load the good runs list
good_df = pd.read_csv('../data/goodruns_2020.txt')
good_df['good'] = 1

# load pmt precuts
df_pmtprecut_bnb = read_root('../data/mcc9_v13_bnb_overlay/pmt_precut_dump.root')
df_pmtprecut_nue = read_root('../data/mcc9_v13_nueintrinsic_overlay/pmt_precut_dump.root')
df_pmtprecut_ext = read_root('../data/mcc9jan_extbnb/pmt_precut_dump.root')
df_pmtprecut_data = read_root('../data/mcc9jan_bnb5e19/pmt_precut_dump.root')

RSE=['run','subrun','event']

In [4]:
xsec_mcc9_file = ROOT.TFile('../data/xsec_graphs_mcc9_broken.root')
xsec_mcc9_graph_numu = xsec_mcc9_file.Get('nu_mu_Ar40/qel_cc_n')
xsec_mcc9_graph_nue = xsec_mcc9_file.Get('nu_e_Ar40/qel_cc_n')
xsec_tune1_file = ROOT.TFile('../data/xsec_graphs_tune1.root')
xsec_tune1_graph_numu = xsec_tune1_file.Get('nu_mu_Ar40/qel_cc_n')
xsec_tune1_graph_nue = xsec_tune1_file.Get('nu_e_Ar40/qel_cc_n')

In [5]:
# Beam quality
beamq_df = read_root('../data/beamdataquality_hist_bnb5e19.root','beamdataquality/bdq')

In [6]:
tag = 'Jan21-Sparsenet-1m1p'

# MC BNB OVERLAY

In [7]:
df_bnb = read_root('../data/mcc9_v13_bnb_overlay/FVV-Prime-bnb_overlay_run1-sparsenet_NUMUONLY.root')
df_bnb_truth = read_root('../data/mcc9_v13_bnb_overlay/MCInfo.root','EventMCINFO_DL')

print('Loaded Files')

# merge dfs
df_bnb = df_bnb.join(df_bnb_truth.set_index(RSE),on=RSE)
df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_pmtprecut_bnb.set_index(RSE),on=RSE)

df_numu = df_bnb.query('parentPDG==14 or parentPDG==-14')
df_numu_goodruns = df_numu.query("good==1")
df_numu_goodruns_pmtprecut = df_numu_goodruns.query("passed==1")

print('Merged')

df_numu_goodruns_pmtprecut.insert(0,'scedir_corrected',df_numu_goodruns_pmtprecut.apply(bless_scedir,axis=1))

print('Scedir Corrected')

df_numu_goodruns_pmtprecut.insert(0,'geniereweight',df_numu_goodruns_pmtprecut.apply(bless_ccqe_reweight_numu,axis=1))

print('Genie Reweighted')

Loaded Files
Merged
Scedir Corrected
Genie Reweighted


In [8]:
df_numu_goodruns_pmtprecut.to_pickle('../data/pickles/numu_goodruns_pmtprecut_%s.pkl'%tag)

print('Saved Pickle')

del df_bnb,df_numu,df_numu_goodruns_pmtprecut

print('Cleaned up')

Saved Pickle
Cleaned up


# DIRT

In [9]:
df_dirt = read_root('../data/mcc9_v13_overlay_dirt_run1/FVV_mcc9_dirt_NUMU.root')

print('Loaded Files')

df_dirt = df_dirt.join(good_df.set_index('run'),on='run')

df_dirt_goodruns = df_dirt.query("good==1")
df_dirt_goodruns_pmtprecut = df_dirt_goodruns.query("PassPMTPrecut==1")

print('Merged')

Loaded Files
Merged


In [10]:
df_dirt_goodruns_pmtprecut.to_pickle('../data/pickles/dirt_goodruns_pmtprecut_%s.pkl'%tag)

print('Saved Pickle')

del df_dirt, df_dirt_goodruns,df_dirt_goodruns_pmtprecut

print('Cleaned up')

Saved Pickle
Cleaned up


# MC NUE OVERLAY

In [11]:
df_nue = read_root('../data/mcc9_v13_nueintrinsic_overlay/FVV-Prime-nueintrinsic_overlay_run1-sparsenet_NUMUONLY.root')
df_nue_truth = read_root('../data/mcc9_v13_nueintrinsic_overlay/mc_info_mcc9_v13_nueintrinsics_overlay_run1.root','EventMCINFO_DL')

print('Loaded Files')

df_nue = df_nue.join(df_nue_truth.set_index(RSE),on=RSE)
df_nue = df_nue.join(good_df.set_index('run'),on='run')
df_nue = df_nue.join(df_pmtprecut_nue.set_index(RSE),on=RSE)


df_nue_goodruns = df_nue.query("good==1")
df_nue_goodruns_pmtprecut = df_nue_goodruns.query("passed==1")

print('Merged')

df_nue_goodruns_pmtprecut.insert(0,'scedir_corrected',df_nue_goodruns_pmtprecut.apply(bless_scedir,axis=1))

print('Scedr Corrected')

df_nue_goodruns_pmtprecut.insert(0,'geniereweight',df_nue_goodruns_pmtprecut.apply(bless_ccqe_reweight_nue,axis=1))

print ('Genie Reweighted')

Loaded Files
Merged
Scedr Corrected
Genie Reweighted


In [12]:
df_nue_goodruns_pmtprecut.to_pickle('../data/pickles/nue_goodruns_pmtprecut_%s.pkl'%tag)

print('Saved Pickle')

del df_nue,df_nue_goodruns_pmtprecut,df_nue_truth

print('Cleaned up')

Saved Pickle
Cleaned up


# EXT

In [13]:
df_ext = read_root('../data/mcc9jan_extbnb/FVV-Prime-ext-sparsenet.root')

print('Loaded files')

df_ext = df_ext.join(good_df.set_index('run'),on='run')

df_ext_goodruns = df_ext.query("good==1")
df_ext_goodruns_pmtprecut = df_ext_goodruns.query("PassPMTPrecut==1")

print('Merged')

# restrict to 5e19 run range
#df_ext_goodruns_pmtprecut = df_ext_goodruns_pmtprecut.query('run>=5119 and run<=5955')

Loaded files
Merged


In [14]:
df_ext_goodruns_pmtprecut.to_pickle('../data/pickles/ext_goodruns_pmtprecut_%s.pkl'%tag)

print('Saved Pickle')

del df_ext,df_ext_goodruns,df_ext_goodruns_pmtprecut

print('Cleaned up')

Saved Pickle
Cleaned up


In [11]:
beamq_df.keys()

Index(['run', 'subrun', 'sec', 'msec', 'event', 'tor', 'horn', 'fom',
       'trigger', 'result'],
      dtype='object')

# Data

In [21]:
df_data = read_root('../data/mcc9jan_bnb5e19/FVV-Prime-data-sparsenet_NUMU.root')

print('Loaded files')

df_data = df_data.join(good_df.set_index('run'),on='run')
df_data = df_data.join(beamq_df.set_index(['run','subrun','event']),on=['run','subrun','event'])

df_data_goodruns = df_data.query("good==1 and result==1")
df_data_goodruns_pmtprecut = df_data_goodruns.query("PassPMTPrecut==1")

print('Merged')

Loaded files
245023
245023
245023
Merged


In [22]:
df_data_goodruns_pmtprecut.to_pickle('../data/pickles/data_goodruns_pmtprecut_%s.pkl'%tag)

print('Saved Pickle')

del df_data,df_data_goodruns,df_data_goodruns_pmtprecut

print('Cleaned up')

Saved Pickle
Cleaned up


In [18]:

len(df_data)

2086141

In [24]:
df_data = read_root('../data/mcc9jan_bnb5e19/FVV-Prime-data-sparsenet_NUMU.root')

In [39]:
print(len(df_data))

245023


In [38]:
df_data_1 = pd.merge(df_data,good_df,on='run')
print(len(df_data_1))
df_data_1b = df_data.set_index('run').join(good_df.set_index('run'))
df_data_1b.reset_index(inplace=True)
print(len(df_data_1b))
df_data_1c = df_data.join(good_df.set_index('run'),on='run')
print(len(df_data_1c))

225888
245023
245023


In [None]:
print(len(df_data.query('PassCuts==1')))



print('Merged')

In [None]:
df_nue_goodruns_pmtprecut.insert(0,'leeweight',df_nue_goodruns_pmtprecut.apply(bless_leeweight,axis=1))

# Get POT for mc

In [31]:
df_pot = read_root('../data/mcc9_v13_bnb_overlay/pot_scrape.root','pot_tree',columns=['run', 'subrun','pot'])
RS=['run','subrun']

df_pot = pd.merge(df_numu_goodruns.query('passed==1'),df_pot,on=RS)
df_pot.drop_duplicates(RS,inplace=True)
print(df_pot['pot'].values.sum())

1.219190611900899e+21


In [23]:
# Get list of RS
RS=['run','subrun']
df_rs = df_data_goodruns.drop_duplicates(RS)

f = open('../data/RS_data.txt','w')
for e in range(len(df_rs)):
    f.write('%i %i\n' % (df_rs['run'].values[e], df_rs['subrun'].values[e]))
f.close() 

In [None]:
def bless_scedr(row):
    scedr = float(-1)
    dx = row['Xreco'] - row['parentSCEX']
    dy = row['Yreco'] - row['parentSCEY']
    dz = row['Zreco'] - row['parentSCEZ']
    dr2 = dx*dx+dy*dy+dz*dz
    dr = np.sqrt(dr2)

    scedr = dr
    return scedr


In [None]:
def bless_proton_dedx(row):
    dedx = row['Proton_Edep']/float(row['Proton_TrackLength'])
    return dedx

def bless_proton_dedx_recombo(row):
    
    Rho = 1.383
    betap = 0.183592
    alpha = 0.921969

    Wion = 23.6e-6
    Efield = 0.273

    dedx = (np.exp(row['Proton_dQdx']*(betap/(Rho*Efield))*Wion)-alpha)/(betap/(Rho*Efield))
    return dedx

In [None]:
# restrict to 5e19 run range
#df_nue_goodruns_pmtprecut = df_nue_goodruns_pmtprecut.query('run>=5119 and run<=5955')