In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import pickle
import pandas as pd
import numpy as np
from numpy import mean
from math import sqrt,acos,cos,sin,pi,exp,log,isnan,atan2
#from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from numpy import asarray
from root_pandas import read_root
from matplotlib import gridspec
from scipy import stats
 
from datetime import date

datafolder = '../../rootFiles/1mu1p_selection'
auxfolder = '../aux'
dumpfolder = '../PlotDumps'

RSE  = ['run','subrun','event']
RSEV = ['run','subrun','event','vtxid']

#with open('/home/dcianci/Physics/1e1p/1mu1pSelection/bdtweight_series4_Sept16_run1.pickle','rb') as handle: bkgBDT_run1 = pickle.load(handle)          # Load BDT weights for 1mu1p background differentiation     
#with open('/home/dcianci/Physics/1e1p/1mu1pSelection/bdtweight_series4_Sept16_run2.pickle','rb') as handle: bkgBDT_run2 = pickle.load(handle)          # Load BDT weights for 1mu1p background differentiation  
#with open('/home/dcianci/Physics/1e1p/1mu1pSelection/bdtweight_series4_Sept16_run3.pickle','rb') as handle: bkgBDT_run3 = pickle.load(handle)          # Load BDT weights for 1mu1p background differentiation  
    
#with open('/home/dcianci/Physics/1e1p/testzone/1L1PSelection/bdt_models/bdtweight_series2_june1_run1.pickle','rb') as handle: bkgBDT_run1 = pickle.load(handle)          # Load BDT weights for 1mu1p background differentiation     
#with open('/home/dcianci/Physics/1e1p/testzone/1L1PSelection/bdt_models/bdtweight_series2_june1_run3.pickle','rb') as handle: bkgBDT_run3 = pickle.load(handle)          # Load BDT weights for 1mu1p background differentiation  
    
vars_june1 = ['Phis','ChargeNearTrunk','Enu_1m1p','PhiT_1m1p','AlphaT_1m1p','PT_1m1p','PTRat_1m1p','BjXB_1m1p','BjYB_1m1p','SphB_1m1p','Q0_1m1p','Q3_1m1p','Lepton_PhiReco','Lepton_TrackLength','Proton_PhiReco','Proton_ThetaReco']
bdtVars = vars_june1

Welcome to JupyROOT 6.22/02


In [2]:
def bless_MC_labels(row):
    mclabel = ''
    intlabel = ''
    parentlabel = ''
    pizero = [1090,1086,1090,1080,1015,1013,1011,1008,1006,1004]
    piplusminus = [1085,1079,1032,1017,1014,1007,1005,1003,1028,1021,1016,1012,1010,1009]
        
    if abs(row['nu_pdg']) == 12:
        intlabel = 'nue'
    elif abs(row['nu_pdg']) == 14:
        intlabel = 'numu'
        
    if not (row['MC_nproton']==1 and row['MC_nlepton']==1):
        return 'nLmP'
    elif not 0 < row['MC_scedr'] <= 5.0:
        return 'offvtx'
    elif not abs((row['MC_energyInit']-row['Enu_1m1p'])/row['MC_energyInit']) < 0.2:
        return 'badreco'    
    else:
        if row['nu_interaction_type'] == 1001:
            mclabel = 'CCQE'
        elif row['nu_interaction_type'] == 1000:
            mclabel = 'MEC'
        elif row['nu_interaction_type'] in pizero:
            mclabel = 'pizero'
        elif row['nu_interaction_type'] in piplusminus:
            mclabel = 'piplusminus' 
        else:
            mclabel = 'other'
    return '%s_%s'%(intlabel,mclabel)

def bless_int_labels(row):
    mclabel = ''
    intlabel = ''
    parentlabel = ''
    pizero = [1090,1086,1090,1080,1015,1013,1011,1008,1006,1004]
    piplusminus = [1085,1079,1032,1017,1014,1007,1005,1003,1028,1021,1016,1012,1010,1009]
        
    if abs(row['nu_pdg']) == 12:
        intlabel = 'nue'
    elif abs(row['nu_pdg']) == 14:
        intlabel = 'numu'
        
    if row['nu_interaction_type'] == 1001:
        mclabel = 'CCQE'
    elif row['nu_interaction_type'] == 1000:
        mclabel = 'MEC'
    elif row['nu_interaction_type'] in pizero:
        mclabel = 'pizero'
    elif row['nu_interaction_type'] in piplusminus:
        mclabel = 'piplusminus' 
    else:
        mclabel = 'other'
           
    return '%s_%s'%(intlabel,mclabel)

In [3]:
tag = date.today()

# Tag dictionary:
# Oct21_cmt - all up to date as of the collaboration meeting in october. Identical selection to internal note, now with training samples marked

# All Precuts
orthogonalcut = 'MaxShrFrac < .2'
precuts ='PassSimpleCuts == 1 and ChargeNearTrunk > 0 and FailedBoost != 1 and OpenAng > .5'
pmtprecuts = 'TotPE > 20 and PorchTotPE < 20'
s_precut = orthogonalcut + ' and ' + precuts + ' and ' + pmtprecuts

In [4]:
def proc_df(df_dlana,df_wgts,df_goodrun,bdtwgts,isMC=True):
    
    # join with goodruns list and cv weights list
    df_full = df_dlana.join(df_goodrun.set_index('run'),on='run')
    if(isMC):
        df_full = df_full.join(df_wgts.set_index(RSE)[['nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg','nu_energy_true']],on=RSE)
    df_full_goodruns = df_full.query('good==1')   # keep good runs
    df_full_goodruns_precuts = df_full_goodruns.query(s_precut)    # apply precuts
    
    
    if(isMC):
        df_full_goodruns_precuts.insert(0,'mc_label',df_full_goodruns_precuts.apply(bless_MC_labels,axis=1))
        df_full_goodruns_precuts.insert(0,'int_label',df_full_goodruns_precuts.apply(bless_int_labels,axis=1))
       
    # add a bunch of helpful variables!
    df_full_goodruns_precuts.insert(0,'MPID_eminus',[ef.max() for ef in df_full_goodruns_precuts['EminusPID_int_v'].values])
    df_full_goodruns_precuts.insert(0,'MPID_muon',[ef.max() for ef in df_full_goodruns_precuts['MuonPID_int_v'].values])
    df_full_goodruns_precuts.insert(0,'MPID_proton',[ef.max() for ef in df_full_goodruns_precuts['ProtonPID_int_v'].values])
    df_full_goodruns_precuts.insert(0,'MPID_gamma',[ef.max() for ef in df_full_goodruns_precuts['GammaPID_int_v'].values])
    df_full_goodruns_precuts.insert(0,'MPID_pion',[ef.max() for ef in df_full_goodruns_precuts['PionPID_int_v'].values])
    df_full_goodruns_precuts.insert(0,'Lepton_CosTheta',np.cos(df_full_goodruns_precuts['Lepton_ThetaReco'].values).tolist())
    df_full_goodruns_precuts.insert(0,'Proton_CosTheta',np.cos(df_full_goodruns_precuts['Proton_ThetaReco'].values).tolist())
    
    # add most current bdt weights.
#    df_full_goodruns_precuts.insert(0,'bkgBDT_univ',bdtwgts.predict_proba(df_full_goodruns_precuts[bdtVars].values.tolist())[:,0])
    df_full_nodupes = df_full_goodruns_precuts.sort_values('BDTscore_1mu1p_nu',ascending=True).drop_duplicates(RSE).sort_index()
    
    return df_full_nodupes

def proc_df_fakedata(df_dlana,bdtwgts):
    
    df_full_precuts = df_dlana.query(s_precut)    # apply precuts
    
    # add a bunch of helpful variables!
    df_full_precuts.insert(0,'MPID_eminus',[ef.max() for ef in df_full_precuts['EminusPID_int_v'].values])
    df_full_precuts.insert(0,'MPID_muon',[ef.max() for ef in df_full_precuts['MuonPID_int_v'].values])
    df_full_precuts.insert(0,'MPID_proton',[ef.max() for ef in df_full_precuts['ProtonPID_int_v'].values])
    df_full_precuts.insert(0,'MPID_gamma',[ef.max() for ef in df_full_precuts['GammaPID_int_v'].values])
    df_full_precuts.insert(0,'MPID_pion',[ef.max() for ef in df_full_precuts['PionPID_int_v'].values])
    df_full_precuts.insert(0,'Lepton_CosTheta',np.cos(df_full_precuts['Lepton_ThetaReco'].values).tolist())
    df_full_precuts.insert(0,'Proton_CosTheta',np.cos(df_full_precuts['Proton_ThetaReco'].values).tolist())
    
    # add most current bdt weights.
#    df_full_precuts.insert(0,'bkgBDT_univ',bdtwgts.predict_proba(df_full_precuts[bdtVars].values.tolist())[:,0])
    df_full_nodupes = df_full_precuts.sort_values('BDTscore_1mu1p_nu',ascending=True).drop_duplicates(RSE).sort_index()
    
    return df_full_nodupes

In [5]:
# Time to load the good runs list
good_run1_df = pd.read_csv('%s/R1goodlist.txt'%auxfolder)
good_run2_df = pd.read_csv('%s/R2goodlist.txt'%auxfolder)
good_run3_df = pd.read_csv('%s/R3goodlist.txt'%auxfolder)

good_run1_df['good'] = 1
good_run2_df['good'] = 1
good_run3_df['good'] = 1
#print(good_run1_df)

# Training

In [6]:
# Are we in  the training sample?
a_train_run1 = np.genfromtxt('bdt_run1_trainsample.csv',delimiter=',')
a_train_run3 = np.genfromtxt('bdt_run3_trainsample.csv',delimiter=',')
dic_train_run1 = {}
dic_train_run3 = {}

for tr in a_train_run1:
    idx = tuple((tr[0],tr[1]))
    dic_train_run1[idx] = dict(intrain=1)

for tr in a_train_run3:
    idx = tuple((tr[0],tr[1]))
    dic_train_run3[idx] = dict(intrain=1)
    
def InTrainRun1(row):
    try:
        return dic_train_run1[row.run,row.subrun]['intrain']
    except:
        return 0
    
def InTrainRun3(row):
    try:
        return dic_train_run3[row.run,row.subrun]['intrain']
    except:
        return 0

In [7]:
# Beam quality
beamq_df = read_root('%s/beamdataquality_remix_bnb5e19.root'%datafolder,'bdq')

# MC BNB OVERLAY

##  Run1

In [6]:
s_mc = 'numu_run1'
df_mc = read_root('%s/mcc9_bnb_nu_overlay_CV_run1_500k_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
df_mc_cvweight = read_root('%s/weights_forCV_v48_Sep24_bnb_nu_DetVar_run1.root'%auxfolder)

#df_mc.insert(0,'InTraining',df_mc.apply(InTrainRun1,axis=1)) # If you are training

df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run1_df,0,True) 
#df_mc_full = df_mc_full.query('not (nu_interaction_ccnc==0 and abs(nu_pdg)==12)') # cut out nue ccqes if using nue intrinsic
#print(list(df_mc_full))
#df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

del df_mc, df_mc_cvweight, df_mc_full

In [10]:
s_mc = 'numu_lowe_run1'
df_mc = read_root('%s/bnb_overlay/mcc9_v29e_run1_bnb_nu_overlay_LowE.root'%datafolder,'dlana/FinalVertexVariables')
df_mc_cvweight = read_root('%s/bnb_overlay/weights_forCV_v48_Sep24_bnb_nu_lowE_run1.root'%datafolder)

df_mc.insert(0,'InTraining',df_mc.apply(InTrainRun1,axis=1))

df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run1_df,bkgBDT_run1,True)
df_mc_full = df_mc_full.query('not (nu_interaction_ccnc==0 and abs(nu_pdg)==12)') # cut out nue ccqes

df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

del df_mc, df_mc_cvweight, df_mc_full

##  Run 2

In [58]:
# s_mc = 'numu_run2'
# df_mc = read_root('%s/bnb_overlay/mcc9_v29e_dl_run2_bnb_nu_overlay_finalbdt.root'%datafolder,'dlana/FinalVertexVariables')
# df_mc_cvweight = read_root('%s/bnb_overlay/weights_forCV_v40_bnb_nu_run2.root'%datafolder)

# df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run2_df,bkgBDT_run3,True) 
# df_mc_full = df_mc_full.query('not (nu_interaction_ccnc==0 and abs(nu_pdg)==12)') # cut out nue ccqes

# df_mc_full['InTraining'] = 0

# print('Total Verts',len(df_mc_full))
# print('mec',len(df_mc_full.query("mc_label in ['numu_MEC']")))
# print('piplusminus',len(df_mc_full.query("mc_label in ['numu_piplusminus']")))
# print('pizero',len(df_mc_full.query("mc_label in ['numu_pizero']")))
# print('numu CCQE',len(df_mc_full.query("mc_label in ['numu_CCQE']")))
# print('other',len(df_mc_full.query("mc_label in ['numu_other','BNB nue_other','nue_MEC','nue_piplusminus','nue_pizero']")))

# df_mc_full.query('abs(MC_energyInit-nu_energy_true) < .0003').to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

# #del df_mc, df_mc_cvweight, df_mc_full

Total Verts 35569
mec 209
piplusminus 229
pizero 87
numu CCQE 1963
other 159


## Run3

In [7]:
s_mc = 'numu_run3'
df_mc = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_CV_v1_1_3_fvv_numu.root'%datafolder,'dlana/FinalVertexVariables')
df_mc_cvweight = read_root('%s/weights_forCV_v48_Sep24_bnb_nu_DetVar_run3.root'%auxfolder)

#df_mc.insert(0,'InTraining',df_mc.apply(InTrainRun3,axis=1))

df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run3_df,0,True) 
#df_mc_full = df_mc_full.query('not (nu_interaction_ccnc==0 and abs(nu_pdg)==12)') # cut out nue ccqes if using nu_e int

df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

del df_mc, df_mc_cvweight, df_mc_full

In [12]:
s_mc = 'numu_lowe_run3'
df_mc = read_root('%s/bnb_overlay/mcc9_v29e_run3b_bnb_nu_overlay_LowE.root'%datafolder,'dlana/FinalVertexVariables')
df_mc_cvweight = read_root('%s/bnb_overlay/weights_forCV_v48_Sep24_bnb_nu_lowE_run3.root'%datafolder)

df_mc.insert(0,'InTraining',df_mc.apply(InTrainRun3,axis=1))

df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run3_df,bkgBDT_run3,True) 
df_mc_full = df_mc_full.query('not (nu_interaction_ccnc==0 and abs(nu_pdg)==12)') # cut out nue ccqes

df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

del df_mc, df_mc_cvweight, df_mc_full

# MC NUE OVERLAY

In [13]:
s_mc = 'nue_run1'
df_mc = read_root('%s/nue_intrinsic_overlay/mcc9_v28_wctagger_nueintrinsics_stripped.root'%datafolder,'FinalVertexVariables')
df_mc_cvweight = read_root('%s/nue_intrinsic_overlay/weights_forCV_v48_Sep24_intrinsic_nue_run1.root'%datafolder)

df_mc.insert(0,'InTraining',df_mc.apply(InTrainRun1,axis=1))

df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run1_df,bkgBDT_run1,True)

df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

del df_mc, df_mc_cvweight, df_mc_full

In [56]:
# s_mc = 'nue_run2'
# df_mc = read_root('%s/nue_intrinsic_overlay/mcc9_v29e_dl_run2_bnb_intrinsics_nue_overlay_finalbdt.root'%datafolder,'dlana/FinalVertexVariables')
# df_mc_cvweight = read_root('%s/nue_intrinsic_overlay/weights_forCV_v40_intrinsic_nue_run2.root'%datafolder)

# df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run2_df,bkgBDT_run3,True)

# df_mc_full['InTraining'] = 0

# #df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

# #del df_mc, df_mc_cvweight, df_mc_full

In [14]:
s_mc = 'nue_run3'
df_mc = read_root('%s/nue_intrinsic_overlay/mcc9_v29e_run3b_bnb_intrinsic_nue_overlay_nocrtremerge_stripped.root'%datafolder,'FinalVertexVariables')
df_mc_cvweight = read_root('%s/nue_intrinsic_overlay/weights_forCV_v48_Sep24_intrinsic_nue_run3.root'%datafolder)

df_mc.insert(0,'InTraining',df_mc.apply(InTrainRun3,axis=1))

df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run3_df,bkgBDT_run3,True)

df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

del df_mc, df_mc_cvweight, df_mc_full

# Dirt

In [15]:
# s_mc = 'dirt_run1'
# df_mc = read_root('%s/dirt/FVV-Prime-dirt-Mar3-WC-1M1P.root'%datafolder,'FinalVertexVariables')
# df_mc_cvweight = read_root('%s/dirt/weights_forCV_v40_dirt_nu_run1.root'%datafolder)

# df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run1_df,bkgBDT_run1,True)

# df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

# del df_mc, df_mc_cvweight, df_mc_full

# EXT

In [16]:
s_ext = 'ext_run1'
df_ext = read_root('%s/ext/mcc9_v28_wctagger_extbnbFULL_stripped.root'%datafolder)

df_ext.insert(0,'InTraining',df_ext.apply(InTrainRun1,axis=1))

df_ext_full = proc_df(df_ext,'',good_run1_df,bkgBDT_run1,False)

df_ext_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_ext,tag))

In [17]:
s_ext = 'ext_run3'
df_ext = read_root('%s/ext/mcc9_v29e_dl_run3_G1_extbnb_stripped.root'%datafolder)
    
df_ext.insert(0,'InTraining',df_ext.apply(InTrainRun3,axis=1))

df_ext_full = proc_df(df_ext,'',good_run3_df,bkgBDT_run3,False)

df_ext_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_ext,tag)) 

# Data

In [21]:
s_data = 'data_run1_5e19'
df_data = read_root('%s/bnb/mcc9_v28_wctagger_5e19.root'%datafolder,'dlana/FinalVertexVariables')

# add beam quality filter
df_data = df_data.join(beamq_df.set_index(['run','subrun','event']),on=['run','subrun','event'])
df_data_full = proc_df(df_data,'',good_run1_df,bkgBDT_run1,False)

df_data_full['InTraining'] = 0

df_data_full.query('result==1').to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [22]:
s_data = 'data_run3_1e19'
df_data = read_root('%s/bnb/mcc9_v28_wctagger_run3_bnb1e19.root'%datafolder,'dlana/FinalVertexVariables')

df_data_full = proc_df(df_data,'',good_run3_df,bkgBDT_run3,False)

df_data_full['InTraining'] = 0

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [23]:
s_data = 'data_run2_filter'
df_data = read_root('%s/bnb/mcc9_v29e_run2_D2E1_1m1p_fvv.root'%datafolder,'dlana/FinalVertexVariables')

df_data_full = proc_df(df_data,'',good_run2_df,bkgBDT_run2,False)

df_data_full['InTraining'] = 0

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [24]:
s_data = 'data_run3_filter'
df_data = read_root('%s/bnb/mcc9_v29e_run3_F1G1_1m1p_fvv.root'%datafolder,'dlana/FinalVertexVariables')

df_data_full = proc_df(df_data,'',good_run3_df,bkgBDT_run3,False)

df_data_full['InTraining'] = 0

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [25]:
s_data = 'data_run1_filter'
df_data = read_root('%s/bnb/mcc9_v29e_dl_run1_C1_bnb_dlfilter_1m1p_v1_1_2b_fvv.root'%datafolder,'dlana/FinalVertexVariables')

df_data_full = proc_df(df_data,'',good_run1_df,bkgBDT_run1,False)

df_data_full['InTraining'] = 0

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [10]:
s_data = 'fakedata_set1_run1'
df_data = read_root('%s/fakedata/dlfilter_fakedata_v08_00_00_29e_dl_ubdlana_v1_1_2_set1_run1_1m1p_stripped.root'%datafolder,'FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run1)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [11]:
s_data = 'fakedata_set1_run3b'
df_data = read_root('%s/fakedata/dlfilter_fakedata_v08_00_00_29e_dl_ubdlana_v1_1_2_set1_run3b_1m1p_stripped.root'%datafolder,'FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run3)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [13]:
s_data = 'fakedata_set3_run1'
df_data = read_root('%s/fakedata/mcc9_v29e_dl_set3_fakedata_run1_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run1)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [14]:
s_data = 'fakedata_set3_run3b'
df_data = read_root('%s/fakedata/mcc9_v29e_dl_set3_fakedata_run3b_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run3)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [7]:
s_data = 'fakedata_set2_run1'
df_data = read_root('%s/fakedata/mcc9_v29e_dl_set2_fakedata_run1_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run1)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [8]:
s_data = 'fakedata_set2_run3b'
df_data = read_root('%s/fakedata/mcc9_v29e_dl_set2_fakedata_run3b_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run3)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [20]:
s_data = 'fakedata_set4_run1'
df_data = read_root('%s/fakedata/mcc9_v29e_dl_set4_fakedata_run1_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run1)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [21]:
s_data = 'fakedata_set4_run3b'
df_data = read_root('%s/fakedata/mcc9_v29e_dl_set4_fakedata_run3b_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run3)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [22]:
s_data = 'fakedata_set5_run1'
df_data = read_root('%s/fakedata/mcc9_v29e_dl_set5_fakedata_run1_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run1)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

# Detector Systematics (run3; not updated)

In [6]:
def proc_detvar_mc(df_dlana,df_cv,df_goodrun,df_wgts):
    
    df_cv['isCV'] = True
    df_dlana['isCV'] = False
    # Join cv and detsys together
    # ugh. okay. this phase takes a million years so let's do it smarter
    
    df3_big = pd.concat((df_cv,df_dlana))
    overlap = df_cv.merge(df_dlana[RSE],how='inner')[RSE].drop_duplicates()
    df_full = df3_big.merge(overlap,how='inner')
    df_full = df_full.join(df_wgts.set_index(RSE)[['nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg','nu_energy_true']],on=RSE)
    #df_full = pd.concat((df_dlana[df_dlana[RSE].isin(df_cv[RSE])],df_cv[df_cv[RSE].isin(df_dlana[RSE])])) 
    df_full_wGoodruns = df_full.join(df_goodrun.set_index('run'),on='run')
    df_full_goodruns = df_full_wGoodruns.query('good == 1 and Enu_1m1p > 0')
    df_full_goodruns.insert(0,'PassPrecuts1m1p',df_full_goodruns.apply(passPrecut,axis=1))     




    
#    df_full_goodruns = df_full_goodruns.query(s_precut)    # apply precuts
    
    
#    if(isMC):
#        df_full_goodruns.insert(0,'mc_label',df_full_goodruns.apply(bless_MC_labels,axis=1))
#        df_full_goodruns.insert(0,'int_label',df_full_goodruns.apply(bless_int_labels,axis=1))
       
    # add a bunch of helpful variables!
    df_full_goodruns.insert(0,'MPID_eminus',[ef.max() for ef in df_full_goodruns['EminusPID_int_v'].values])
    df_full_goodruns.insert(0,'MPID_muon',[ef.max() for ef in df_full_goodruns['MuonPID_int_v'].values])
    df_full_goodruns.insert(0,'MPID_proton',[ef.max() for ef in df_full_goodruns['ProtonPID_int_v'].values])
    df_full_goodruns.insert(0,'MPID_gamma',[ef.max() for ef in df_full_goodruns['GammaPID_int_v'].values])
    df_full_goodruns.insert(0,'MPID_pion',[ef.max() for ef in df_full_goodruns['PionPID_int_v'].values])
    df_full_goodruns.insert(0,'Lepton_CosTheta',np.cos(df_full_goodruns['Lepton_ThetaReco'].values).tolist())
    df_full_goodruns.insert(0,'Proton_CosTheta',np.cos(df_full_goodruns['Proton_ThetaReco'].values).tolist())
    
    # add most current bdt weights.
#    df_full_goodruns.insert(0,'bkgBDT_univ',bdtwgts.predict_proba(df_full_goodruns[bdtVars].values.tolist())[:,0])
    df_full_nodupes = df_full_goodruns.sort_values('BDTscore_1mu1p_nu',ascending=True).drop_duplicates(RSE).sort_index()
    return df_full_goodruns


def passPrecut(row):
    #orthogonalcut = 'MaxShrFrac < .2'
    #precuts ='PassSimpleCuts == 1 and ChargeNearTrunk > 0 and FailedBoost != 1 and OpenAng > .5 and '
    #s_precut = orthogonalcut + ' and ' + precuts
    return (row['PassSimpleCuts'] == 1 and row['ChargeNearTrunk'] > 0 and row['FailedBoost'] != 1 and row['OpenAng'] > .5 and row['MaxShrFrac'] < .2)

# example of one detvar


In [37]:

df_cv = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_CV_v1_1_3_fvv_numu.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('../../data/detsys/Pi0Sel/July27_CV_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_cv_wPass = df_cv.join(df_passPi0.set_index(RSEV),on=RSEV)
print('done cv')
print(df_cv.shape[0])

s_data = 'wiremodYZ'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_WireModYZ_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
print(df_bnb.shape[0])

#df_passPi0 = pd.read_table('../../data/detsys/Pi0Sel/July27_wiremodthetaXZ_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv,good_run3_df)
print(df_bnb_wCV_wPass_wProc.shape[0])
#df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

done cv
10638
22291
186
done wiremodYZ


In [6]:
print(tag)

2021-01-18


# all detvar with thier correspondance cv 


In [13]:
run = 'run3'

df_mc_cvweight_500 = read_root('%s/weights_forCV_v48_Sep24_bnb_nu_DetVar_run3.root'%auxfolder)
df_mc_cvweight_1000 = read_root('%s/weights_forCV_v48_Sep24_bnb_nu_run3.root'%auxfolder)
print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
print(df_mc_cvweight_1000)
df_cv_500 = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_CV_v1_1_3_fvv_numu.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_CV_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_cv = df_cv.join(df_passPi0.set_index(RSEV),on=RSEV)
print('done cv500')

df_cv_1000 = read_root('%s/mcc9_v29e_dl_run3b_bnb_nu_overlay_nocrtremerge_finalbdt.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_CV_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_cv = df_cv.join(df_passPi0.set_index(RSEV),on=RSEV)
print('done cv1000')

s_data = 'wiremodThetaXZ'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_WireModThetaXZ_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_wiremodthetaXZ_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])


s_data = 'wiremodThetaYZ'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_WireModThetaYZ_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_wiremodthetaYZ_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])

s_data = 'wiremodYZ'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_WireModYZ_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_wiremodYZ_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])

s_data = 'wiremodX'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_WireModX_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_wiremodX_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])


s_data = 'LYRayleigh'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_LYRayleigh_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_LYRayleigh_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])


s_data = 'LYAtt'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_LYAttenuation_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_LYRayleigh_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc)

s_data = 'LYdown'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_LYDown_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_LYdown_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc)



s_data = 'SCE'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_SCE_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_SCE_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_500,good_run3_df,df_mc_cvweight_500)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print(df_bnb_wCV_wPass_wProc.shape[0])

print('done %s'%s_data)




s_data = 'recomb'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3_detvar_Recomb2_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_scaleddedx_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_500,good_run3_df,df_mc_cvweight_500)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])

print('----done all----')

done cv500
done cv1000
done wiremodThetaXZ
21269
done wiremodThetaYZ
21624
done wiremodYZ
22479
done wiremodX
21681
done LYRayleigh
23510
done LYAtt
       Proton_CosTheta  Lepton_CosTheta  MPID_pion  MPID_gamma  MPID_proton  \
0             0.830783        -0.034722   0.159228    0.014726     0.998197   
1             0.258883        -0.171274   0.678799    0.030676     0.839291   
2             0.830783        -0.034722   0.159229    0.014726     0.998197   
3             0.258883        -0.171274   0.678799    0.030676     0.839292   
4             0.975623        -0.270942   0.208159    0.017922     0.941928   
...                ...              ...        ...         ...          ...   
26322         0.747969        -0.920739   0.721294    0.239540     0.342381   
26323         0.916610        -0.067103   0.802907    0.109758     0.946799   
26325         0.928967        -0.014260   0.717957    0.737711     0.946799   
26326         0.704559        -0.922697   0.721293    0.23954

# Run1

In [14]:
run = 'run1'
df_mc_cvweight = read_root('%s/weights_forCV_v48_Sep24_bnb_nu_DetVar_run1.root'%auxfolder)
df_cv_500 = read_root('%s/mcc9_bnb_nu_overlay_CV_run1_500k_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_CV_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_cv = df_cv.join(df_passPi0.set_index(RSEV),on=RSEV)
print('done cv500')


s_data = 'LYRayleigh'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run1_detvar_LYRayleigh_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_LYRayleigh_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_500,good_run1_df,df_mc_cvweight)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])


s_data = 'LYAtt'
df_bnb = read_root('%s/mcc9_bnb_nu_overlay_DetVar_LYAttenuation_run1_500k_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_LYRayleigh_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_500,good_run1_df,df_mc_cvweight)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])

s_data = 'LYdown'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run1_detvar_LYDown_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_LYdown_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_500,good_run1_df,df_mc_cvweight)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])



s_data = 'SCE'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run1_detvar_SCE_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_SCE_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_500,good_run1_df,df_mc_cvweight)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print(df_bnb_wCV_wPass_wProc.shape[0])

print('done %s'%s_data)




s_data = 'recomb'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run1_detvar_Recomb2_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
#df_passPi0 = pd.read_table('%s/Pi0Sel/July27_scaleddedx_numu.txt',sep=',')
#df_passPi0['PassFinalSelectionPi0'] = 1
#df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_500,good_run1_df,df_mc_cvweight)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])

print('----done all----')

done cv500
done LYRayleigh
17704
done LYAtt
15597
done LYdown
16014
11025
done SCE
done recomb
12281
----done all----


In [36]:
#Detsys for nues now!
df_cv = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_CV.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_CV.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_cv_wPass = df_cv.merge(df_passPi0,how='left')
print('donecv')

s_data = 'wiremodX'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_wiremodX.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_WireModX.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)


s_data = 'LYdownRayleigh'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_wiremodLYRayleigh.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_LYdownRayleigh.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)


s_data = 'LYdown'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_wiremodLYdown.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_LYdown.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'SCE'
df_bnb = read_root('../../data/detsys/mcc9_v29e_dl_run3b_bnb_intrinsic_nue_wiremodSCE.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_SCE.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'wiremoddEdx'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_wiremodScaleddEdX.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_WireModdEdx.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'wiremodThetaXZ'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_wiremodThetaXZ.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_WireModThetaXZ.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'wiremodThetaYZ'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_wiremodThetaYZ.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_WireModThetaYZ.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'wiremodYZ'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_wiremodYZ.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_WireModYZ.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

donecv
done wiremodX
done LYdownRayleigh
done LYdown
done SCE
done wiremoddEdx
done wiremodThetaXZ
done wiremodThetaYZ
done wiremodYZ


In [70]:
s_data = 'LYdown'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_nu_overlay_DetVar_LYdown.root','dlana/FinalVertexVariables')

In [71]:
df_bnb['MC_energyInit']

0       -99998.0
1       -99998.0
2       -99998.0
3       -99998.0
4       -99998.0
          ...   
30387   -99998.0
30388   -99998.0
30389   -99998.0
30390   -99998.0
30391   -99998.0
Name: MC_energyInit, Length: 30392, dtype: float32

# DIRT

OSError: cannot read ../data/dirt/FVV-Prime-dirt-Mar3-WC-1M1P.root

Error in <TFile::TFile>: file ../data/dirt/FVV-Prime-dirt-Mar3-WC-1M1P.root does not exist
