In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import pickle
import pandas as pd
import numpy as np
import xgboost as xgb
from numpy import mean
from math import sqrt,acos,cos,sin,pi,exp,log,isnan,atan2
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from numpy import asarray
from root_pandas import read_root
from matplotlib import gridspec
from scipy import stats

datafolder = '../../data'
dumpfolder = '../PlotDumps'
RSE  = ['run','subrun','event']
RSEV = ['run','subrun','event','vtxid']

Welcome to JupyROOT 6.18/04


This notebook takes all of the cumbersome input files necessary for this analysis and creates neat, portable parquet files containing the dataframes which can be easily used throughout the rest of the study.

Load BDT weights from pickles. These BDTs are already applied in the DLAna processing stage, but I have this implemented for testing.

In [2]:
with open('/home/dcianci/Physics/1e1p/testzone/1L1PSelection/bdt_models/bdtweight_series2_june1_run1.pickle','rb') as handle: bkgBDT_run1 = pickle.load(handle)          # Load BDT weights for 1mu1p background differentiation     
with open('/home/dcianci/Physics/1e1p/testzone/1L1PSelection/bdt_models/bdtweight_series2_june1_run3.pickle','rb') as handle: bkgBDT_run3 = pickle.load(handle)          # Load BDT weights for 1mu1p background differentiation  
    
# the variables used as BDT features
vars_june1 = ['Phis','ChargeNearTrunk','Enu_1m1p','PhiT_1m1p','AlphaT_1m1p','PT_1m1p','PTRat_1m1p','BjXB_1m1p','BjYB_1m1p','SphB_1m1p','Q0_1m1p','Q3_1m1p','Lepton_PhiReco','Lepton_TrackLength','Proton_PhiReco','Proton_ThetaReco']
bdtVars = vars_june1

In [3]:
#xgb.dump(bkgBDT_run1,'bkgBDT_run1')
type(bkgBDT_run1)
bkgBDT_run1.save_model('bkgBDT_run1.model')
bkgBDT_run3.save_model('bkgBDT_run3.model')


My homebrew MC labeling scheme for easy sorting and plotting later on: In bless_int_labels, elements of a dataframe are assigned labels based on underlying neutrino interaction; in bless_MC_labels, we do the same thing, but add additional reconstruction qualifiers as described in the DL internal note.

In [4]:
def bless_MC_labels(row):
    mclabel = ''
    intlabel = ''
    parentlabel = ''
    pizero = [1090,1086,1090,1080,1015,1013,1011,1008,1006,1004]
    piplusminus = [1085,1079,1032,1017,1014,1007,1005,1003,1028,1021,1016,1012,1010,1009]
        
    if abs(row['nu_pdg']) == 12:
        intlabel = 'nue'
    elif abs(row['nu_pdg']) == 14:
        intlabel = 'numu'
        
    if not (row['MC_nproton']==1 and row['MC_nlepton']==1):
        return 'nLmP'
    elif not 0 < row['MC_scedr'] <= 5.0:
        return 'offvtx'
    elif not abs((row['MC_energyInit']-row['Enu_1m1p'])/row['MC_energyInit']) < 0.2:
        return 'badreco'    
    else:
        if row['nu_interaction_type'] == 1001:
            mclabel = 'CCQE'
        elif row['nu_interaction_type'] == 1000:
            mclabel = 'MEC'
        elif row['nu_interaction_type'] in pizero:
            mclabel = 'pizero'
        elif row['nu_interaction_type'] in piplusminus:
            mclabel = 'piplusminus' 
        else:
            mclabel = 'other'
    return '%s_%s'%(intlabel,mclabel)

def bless_int_labels(row):
    mclabel = ''
    intlabel = ''
    parentlabel = ''
    pizero = [1090,1086,1090,1080,1015,1013,1011,1008,1006,1004]
    piplusminus = [1085,1079,1032,1017,1014,1007,1005,1003,1028,1021,1016,1012,1010,1009]
        
    if abs(row['nu_pdg']) == 12:
        intlabel = 'nue'
    elif abs(row['nu_pdg']) == 14:
        intlabel = 'numu'
        
    if row['nu_interaction_type'] == 1001:
        mclabel = 'CCQE'
    elif row['nu_interaction_type'] == 1000:
        mclabel = 'MEC'
    elif row['nu_interaction_type'] in pizero:
        mclabel = 'pizero'
    elif row['nu_interaction_type'] in piplusminus:
        mclabel = 'piplusminus' 
    else:
        mclabel = 'other'
           
    return '%s_%s'%(intlabel,mclabel)

Add a tag for the parquets to be exported here and now. Some space is provided describing previous tags.

In [5]:
tag = 'Feb16_ForOsc'

# Tag dictionary:
# Oct21_cmt - all up to date as of the collaboration meeting in october. Identical selection to internal note, now with training samples marked
# RunCompatibility - tune 1 and special BDT training for tests
# Nov17_tune2 - Same as Oct21_cmt, but now all runs have genie tune 2 weighting
# Feb10_pi0wgt - Same as Nov17_tune2, but with pi0 weights

# All Precuts
orthogonalcut = 'MaxShrFrac < .2'
precuts ='PassSimpleCuts == 1 and ChargeNearTrunk > 0 and FailedBoost != 1 and OpenAng > .5'
pmtprecuts = 'TotPE > 20 and PorchTotPE < 20'
s_precut = orthogonalcut + ' and ' + precuts + ' and ' + pmtprecuts

Functions to "process" the MC. Given the MC dataframe, the weights dataframe, the good runs dataframe and the BDT weights to be used, we combine everything neatly.

In [6]:
def proc_df(df_dlana,df_wgts,df_goodrun,bdtwgts,isMC=True):
    
    # join with goodruns list and cv weights list
    df_full = df_dlana.join(df_goodrun.set_index('run'),on='run')
    if(isMC):
        df_full = df_full.join(df_wgts.set_index(RSE)[['nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg','nu_energy_true','nu_L_true']],on=RSE)
    df_full_goodruns = df_full.query('good==1')   # keep good runs
    df_full_goodruns_precuts = df_full_goodruns.query(s_precut)    # apply precuts
       
    if(isMC):
        df_full_goodruns_precuts.insert(0,'mc_label',df_full_goodruns_precuts.apply(bless_MC_labels,axis=1))
        df_full_goodruns_precuts.insert(0,'int_label',df_full_goodruns_precuts.apply(bless_int_labels,axis=1))
       
    # add a bunch of helpful variables!
    df_full_goodruns_precuts.insert(0,'MPID_eminus',[ef.max() for ef in df_full_goodruns_precuts['EminusPID_int_v'].values])
    df_full_goodruns_precuts.insert(0,'MPID_muon',[ef.max() for ef in df_full_goodruns_precuts['MuonPID_int_v'].values])
    df_full_goodruns_precuts.insert(0,'MPID_proton',[ef.max() for ef in df_full_goodruns_precuts['ProtonPID_int_v'].values])
    df_full_goodruns_precuts.insert(0,'MPID_gamma',[ef.max() for ef in df_full_goodruns_precuts['GammaPID_int_v'].values])
    df_full_goodruns_precuts.insert(0,'MPID_pion',[ef.max() for ef in df_full_goodruns_precuts['PionPID_int_v'].values])
    df_full_goodruns_precuts.insert(0,'Lepton_CosTheta',np.cos(df_full_goodruns_precuts['Lepton_ThetaReco'].values).tolist())
    df_full_goodruns_precuts.insert(0,'Proton_CosTheta',np.cos(df_full_goodruns_precuts['Proton_ThetaReco'].values).tolist())
    
    # add most current bdt weights.
    # bdt weights are applied as  bkgBDT_univ[ersal] so whether we are dealing with Run 1, 2 or 3, we are calling the correct weight, rather than one for a particular Run.
    df_full_goodruns_precuts.insert(0,'bkgBDT_univ',bdtwgts.predict_proba(df_full_goodruns_precuts[bdtVars].values.tolist())[:,0])
    # for each event, keep only vertex with best (most signal-like) BDT score
    df_full_nodupes = df_full_goodruns_precuts.sort_values('bkgBDT_univ',ascending=True).drop_duplicates(RSE).sort_index()
    return df_full_nodupes
    
def proc_df_fakedata(df_dlana,bdtwgts):
    
    df_full_precuts = df_dlana.query(s_precut)    # apply precuts
    
    # add a bunch of helpful variables!
    df_full_precuts.insert(0,'MPID_eminus',[ef.max() for ef in df_full_precuts['EminusPID_int_v'].values])
    df_full_precuts.insert(0,'MPID_muon',[ef.max() for ef in df_full_precuts['MuonPID_int_v'].values])
    df_full_precuts.insert(0,'MPID_proton',[ef.max() for ef in df_full_precuts['ProtonPID_int_v'].values])
    df_full_precuts.insert(0,'MPID_gamma',[ef.max() for ef in df_full_precuts['GammaPID_int_v'].values])
    df_full_precuts.insert(0,'MPID_pion',[ef.max() for ef in df_full_precuts['PionPID_int_v'].values])
    df_full_precuts.insert(0,'Lepton_CosTheta',np.cos(df_full_precuts['Lepton_ThetaReco'].values).tolist())
    df_full_precuts.insert(0,'Proton_CosTheta',np.cos(df_full_precuts['Proton_ThetaReco'].values).tolist())
    
    # add most current bdt weights.
    df_full_precuts.insert(0,'bkgBDT_univ',bdtwgts.predict_proba(df_full_precuts[bdtVars].values.tolist())[:,0])
    df_full_nodupes = df_full_precuts.sort_values('bkgBDT_univ',ascending=True).drop_duplicates(RSE).sort_index()
    
    return df_full_nodupes

Load the good runs lists into simple dataframes

In [7]:
good_run1_df = pd.read_csv('%s/goodruns_2020.txt'%datafolder)
good_run2_df = pd.read_csv('%s/goodruns_2020_run2.txt'%datafolder)
good_run3_df = pd.read_csv('%s/goodruns_2020_run3.txt'%datafolder)

good_run1_df['good'] = 1
good_run2_df['good'] = 1
good_run3_df['good'] = 1

Create a dictionary to label whether or not a given event is in the BDT training sample

In [8]:
# Are we in  the training sample?
a_train_run1 = np.genfromtxt('bdt_run1_trainsample.csv',delimiter=',')
a_train_run3 = np.genfromtxt('bdt_run3_trainsample.csv',delimiter=',')
dic_train = {}

for tr in a_train_run1:
    idx = tuple((tr[0],tr[1]))
    dic_train[idx] = dict(intrain=1)

for tr in a_train_run3:
    idx = tuple((tr[0],tr[1]))
    dic_train[idx] = dict(intrain=1)
    
def InTrainRun(row):
    try:
        return dic_train[row.run,row.subrun]['intrain']
    except:
        return 0

In [9]:
# Beam quality
beamq_df = read_root('%s/beamdataquality_remix_bnb5e19.root'%datafolder,'bdq')

# MC BNB OVERLAY

##  Run1

In [21]:
s_mc = 'numu_run1'
df_mc = read_root('%s/bnb_overlay/mcc9_v28_wctagger_bnboverlay_stripped.root'%datafolder,'FinalVertexVariables')
df_mc_cvweight = read_root('%s/bnb_overlay/weights_forCV_v48_Sep24_bnb_nu_run1.root'%datafolder) #tune 2
#df_mc_cvweight = read_root('%s/bnb_overlay/arxiv/weights_forCV_v40_bnb_nu_run1.root'%datafolder) # tune 1

df_mc_pi0wgt = pd.read_csv('%s/pi0weight/Dec14_pi0_weights_numu_run1_forDavio.txt'%datafolder)
df_mc = df_mc.join(df_mc_pi0wgt.set_index(RSE),on=RSE)

df_mc.insert(0,'InTraining',df_mc.apply(InTrainRun,axis=1))

df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run1_df,bkgBDT_run1,True) 
df_mc_full = df_mc_full.query('not (nu_interaction_ccnc==0 and abs(nu_pdg)==12)') # cut out nue ccqes (to avoid overcounting with nue intrinsics)

df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

del df_mc, df_mc_cvweight, df_mc_full

In [37]:
s_mc = 'numu_run1'
df_mc = read_root('%s/bnb_overlay/mcc9_v28_wctagger_bnboverlay_stripped.root'%datafolder,'FinalVertexVariables')

df_mc_pi0wgt = pd.read_csv('%s/pi0weight/Dec14_pi0_weights_numu_run1_forDavio.txt'%datafolder)
df_mc = df_mc.join(df_mc_pi0wgt.set_index(RSE),on=RSE)


In [44]:
print(np.nan_to_num(df_mc['pi0 weight'].values).max())

1.3091200927085052


In [11]:
s_mc = 'numu_lowe_run1'
df_mc = read_root('%s/bnb_overlay/mcc9_v29e_run1_bnb_nu_overlay_LowE.root'%datafolder,'dlana/FinalVertexVariables')
df_mc_cvweight = read_root('%s/bnb_overlay/weights_forCV_v48_Sep24_bnb_nu_lowE_run1.root'%datafolder)

df_mc.insert(0,'InTraining',df_mc.apply(InTrainRun,axis=1))

df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run1_df,bkgBDT_run1,True)
df_mc_full = df_mc_full.query('not (nu_interaction_ccnc==0 and abs(nu_pdg)==12)') # cut out nue ccqes

df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

del df_mc, df_mc_cvweight, df_mc_full

##  Run 2
Run 2 had some small indexing errors that I needed to resolve manually (ie: multiple events had the same RSE  and therefore needed new indices. Very annoying.)

In [22]:
s_mc = 'numu_run2'
df_mc = read_root('%s/bnb_overlay/mcc9_v29e_dl_run2_bnb_nu_overlay_finalbdt.root'%datafolder,'dlana/FinalVertexVariables')
df_mc_cvweight = read_root('%s/bnb_overlay/weights_forCV_v48_Sep24_bnb_nu_run2.root'%datafolder)

df_mc_pi0wgt = pd.read_csv('%s/pi0weight/Dec14_pi0_weights_numu_run2_forDavio.txt'%datafolder)
df_mc = df_mc.join(df_mc_pi0wgt.set_index(RSE),on=RSE)

In [23]:
def bless_cvindex(row):
    df_subsec = df_mc_cvweight.query('run==%s and subrun==%s and event==%s'%(row['run'],row['subrun'],row['event']))   
    if(len(df_subsec) == 0):
        return np.nan
    if(len(df_subsec) == 1):
        return df_subsec.index[0]
    idx = np.argmin(np.abs(df_subsec['nu_energy_true'].values-row['MC_energyInit']))
    return df_subsec.index[idx]

In [24]:
df_full = df_mc.join(good_run2_df.set_index('run'),on='run')
df_full_goodruns = df_full.query('good==1')   # keep good runs
df_full_goodruns_precuts = df_full_goodruns.query(s_precut)    # apply precuts
    
df_full_goodruns_precuts.insert(0,'CVIndex',df_full_goodruns_precuts.apply(bless_cvindex,axis=1)) # this takes forever

In [25]:
# add a bunch of helpful variables!
df_full_goodruns_precuts.insert(0,'MPID_eminus',[ef.max() for ef in df_full_goodruns_precuts['EminusPID_int_v'].values])
df_full_goodruns_precuts.insert(0,'MPID_muon',[ef.max() for ef in df_full_goodruns_precuts['MuonPID_int_v'].values])
df_full_goodruns_precuts.insert(0,'MPID_proton',[ef.max() for ef in df_full_goodruns_precuts['ProtonPID_int_v'].values])
df_full_goodruns_precuts.insert(0,'MPID_gamma',[ef.max() for ef in df_full_goodruns_precuts['GammaPID_int_v'].values])
df_full_goodruns_precuts.insert(0,'MPID_pion',[ef.max() for ef in df_full_goodruns_precuts['PionPID_int_v'].values])
df_full_goodruns_precuts.insert(0,'Lepton_CosTheta',np.cos(df_full_goodruns_precuts['Lepton_ThetaReco'].values).tolist())
df_full_goodruns_precuts.insert(0,'Proton_CosTheta',np.cos(df_full_goodruns_precuts['Proton_ThetaReco'].values).tolist())
    
# add most current bdt weights.
df_full_goodruns_precuts.insert(0,'bkgBDT_univ',bkgBDT_run3.predict_proba(df_full_goodruns_precuts[bdtVars].values.tolist())[:,0])
df_full_nodupes = df_full_goodruns_precuts.sort_values('bkgBDT_univ',ascending=True).drop_duplicates('CVIndex').sort_index()

In [26]:
df_full_cved = df_full_nodupes.join(df_mc_cvweight[['nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg','nu_energy_true','nu_L_true']],on='CVIndex')
df_full_cved.insert(0,'mc_label',df_full_cved.apply(bless_MC_labels,axis=1))
df_full_cved.insert(0,'int_label',df_full_cved.apply(bless_int_labels,axis=1))
     
df_mc_final = df_full_cved.query('not (nu_interaction_ccnc==0 and abs(nu_pdg)==12)') # cut out nue ccqes
df_mc_final.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

## Run 3

In [10]:
s_mc = 'numu_run3'
df_mc = read_root('%s/bnb_overlay/mcc9_v29e_dl_run3b_bnb_nu_overlay_nocrtremerge_stripped.root'%datafolder,'FinalVertexVariables')
#df_mc_cvweight = read_root('%s/bnb_overlay/weights_forCV_v48_Sep24_bnb_nu_run3.root'%datafolder) # tune 2
df_mc_cvweight = read_root('%s/bnb_overlay/arxiv/weights_forCV_v40_bnb_nu_run3.root'%datafolder) #tune 1

df_mc_pi0wgt = pd.read_csv('%s/pi0weight/Dec14_pi0_weights_numu_run3_forDavio.txt'%datafolder)
df_mc = df_mc.join(df_mc_pi0wgt.set_index(RSE),on=RSE)

df_mc.insert(0,'InTraining',df_mc.apply(InTrainRun,axis=1))

df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run3_df,bkgBDT_run3,True) 
df_mc_full.to_parquet('mcc9_v29e_dl_run3b_bnb_nu_overlay_nocrtremerge_stripped_wBDT.parquet')

df_mc_full = df_mc_full.query('not (nu_interaction_ccnc==0 and abs(nu_pdg)==12)') # cut out nue ccqes

#df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

#del df_mc, df_mc_cvweight, df_mc_full

In [10]:
s_mc = 'numu_run3'
df_mc = read_root('%s/bnb_overlay/mcc9_v29e_dl_run3b_bnb_nu_overlay_nocrtremerge_stripped.root'%datafolder,'FinalVertexVariables')
#df_mc_cvweight = read_root('%s/bnb_overlay/weights_forCV_v48_Sep24_bnb_nu_run3.root'%datafolder) # tune 2
df_mc_cvweight = read_root('%s/bnb_overlay/arxiv/weights_forCV_v40_bnb_nu_run3.root'%datafolder) #tune 1

In [14]:
df_mc_pi0wgt = pd.read_csv('%s/pi0weight/Dec14_pi0_weights_numu_run3_forDavio.txt'%datafolder)
df_mc = df_mc.join(df_mc_pi0wgt.set_index(RSE),on=RSE)


In [16]:
df_mc.insert(0,'InTraining',df_mc.apply(InTrainRun,axis=1))

In [18]:
df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run3_df,bkgBDT_run3,True) 

In [22]:
df_mc_full.query('run==16949 and subrun==252 and event==12625')['nu_pdg']

1405    12
Name: nu_pdg, dtype: int32

# MC NUE OVERLAY

In [28]:
s_mc = 'nue_run1'
df_mc = read_root('%s/nue_intrinsic_overlay/mcc9_v28_wctagger_nueintrinsics_stripped.root'%datafolder,'FinalVertexVariables')
#df_mc_cvweight = read_root('%s/nue_intrinsic_overlay/weights_forCV_v48_Sep24_intrinsic_nue_run1.root'%datafolder) #tune 2
df_mc_cvweight = read_root('%s/nue_intrinsic_overlay/arxiv/weights_forCV_v40_intrinsic_nue_run1.root'%datafolder) # tune 1

df_mc_pi0wgt = pd.read_csv('%s/pi0weight/Dec14_pi0_weights_nue_run1_forDavio.txt'%datafolder)
df_mc = df_mc.join(df_mc_pi0wgt.set_index(RSE),on=RSE)

df_mc.insert(0,'InTraining',df_mc.apply(InTrainRun,axis=1))

df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run1_df,bkgBDT_run1,True)

df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

del df_mc, df_mc_cvweight, df_mc_full

In [29]:
s_mc = 'nue_run2'
df_mc = read_root('%s/nue_intrinsic_overlay/mcc9_v29e_dl_run2_bnb_intrinsics_nue_overlay_finalbdt.root'%datafolder,'dlana/FinalVertexVariables')
df_mc_cvweight = read_root('%s/nue_intrinsic_overlay/weights_forCV_v48_Sep24_intrinsic_nue_run2.root'%datafolder)

df_mc_pi0wgt = pd.read_csv('%s/pi0weight/Dec14_pi0_weights_nue_run2_forDavio.txt'%datafolder)
df_mc = df_mc.join(df_mc_pi0wgt.set_index(RSE),on=RSE)

df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run2_df,bkgBDT_run3,True)

df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

del df_mc, df_mc_cvweight, df_mc_full

In [30]:
s_mc = 'nue_run3'
df_mc = read_root('%s/nue_intrinsic_overlay/mcc9_v29e_run3b_bnb_intrinsic_nue_overlay_nocrtremerge_stripped.root'%datafolder,'FinalVertexVariables')
#df_mc_cvweight = read_root('%s/nue_intrinsic_overlay/weights_forCV_v48_Sep24_intrinsic_nue_run3.root'%datafolder)  # tune 2
df_mc_cvweight = read_root('%s/nue_intrinsic_overlay/arxiv/weights_forCV_v40_intrinsic_nue_run3.root'%datafolder) # tune 1

df_mc_pi0wgt = pd.read_csv('%s/pi0weight/Dec14_pi0_weights_nue_run3_forDavio.txt'%datafolder)
df_mc = df_mc.join(df_mc_pi0wgt.set_index(RSE),on=RSE)

df_mc.insert(0,'InTraining',df_mc.apply(InTrainRun,axis=1))

df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run3_df,bkgBDT_run3,True)

df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

del df_mc, df_mc_cvweight, df_mc_full

In [31]:
df_mc_pi0wgt

Unnamed: 0,run,subrun,event,pi0weight
0,17753,166,8308,0.963621
1,14877,177,8874,0.836396
2,16446,183,9179,0.687725
3,15492,90,4549,0.698774
4,15785,162,8120,1.0
5,17662,97,4855,1.0
6,14455,142,7108,1.0
7,16159,8,430,0.729623
8,15106,190,9537,1.0
9,15935,146,7332,1.0


# Dirt

In [15]:
# s_mc = 'dirt_run1'
# df_mc = read_root('%s/dirt/FVV-Prime-dirt-Mar3-WC-1M1P.root'%datafolder,'FinalVertexVariables')
# df_mc_cvweight = read_root('%s/dirt/weights_forCV_v40_dirt_nu_run1.root'%datafolder)

# df_mc_full = proc_df(df_mc,df_mc_cvweight,good_run1_df,bkgBDT_run1,True)

# df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

# del df_mc, df_mc_cvweight, df_mc_full

# EXT

In [29]:
s_ext = 'ext_run1'
df_ext = read_root('%s/ext/mcc9_v28_wctagger_extbnbFULL_stripped.root'%datafolder)

df_ext.insert(0,'InTraining',df_ext.apply(InTrainRun,axis=1))

df_ext_full = proc_df(df_ext,'',good_run1_df,bkgBDT_run1,False)

df_ext_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_ext,tag))

In [30]:
s_ext = 'ext_run3'
df_ext = read_root('%s/ext/mcc9_v29e_dl_run3_G1_extbnb_stripped.root'%datafolder)
    
df_ext.insert(0,'InTraining',df_ext.apply(InTrainRun,axis=1))

df_ext_full = proc_df(df_ext,'',good_run3_df,bkgBDT_run3,False)

df_ext_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_ext,tag)) 

# Data

In [12]:
s_data = 'data_run1_5e19_beamqualitytag'
df_data = read_root('%s/bnb/mcc9_v28_wctagger_5e19.root'%datafolder,'dlana/FinalVertexVariables')

# add beam quality filter
df_data = df_data.join(beamq_df.set_index(['run','subrun','event']),on=['run','subrun','event'])
df_data_full = proc_df(df_data,'',good_run1_df,bkgBDT_run1,False)

df_data_full['InTraining'] = 0

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [None]:
s_data = 'data_run3_1e19'
df_data = read_root('%s/bnb/mcc9_v28_wctagger_run3_bnb1e19.root'%datafolder,'dlana/FinalVertexVariables')

df_data_full = proc_df(df_data,'',good_run3_df,bkgBDT_run3,False)

df_data_full['InTraining'] = 0

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [None]:
s_data = 'data_run2_filter'
df_data = read_root('%s/bnb/mcc9_v29e_run2_D2E1_1m1p_fvv.root'%datafolder,'dlana/FinalVertexVariables')

df_data_full = proc_df(df_data,'',good_run2_df,bkgBDT_run2,False)

df_data_full['InTraining'] = 0

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [24]:
s_data = 'data_run3_filter'
df_data = read_root('%s/bnb/mcc9_v29e_run3_F1G1_1m1p_fvv.root'%datafolder,'dlana/FinalVertexVariables')

df_data_full = proc_df(df_data,'',good_run3_df,bkgBDT_run3,False)

df_data_full['InTraining'] = 0

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [25]:
s_data = 'data_run1_filter'
df_data = read_root('%s/bnb/mcc9_v29e_dl_run1_C1_bnb_dlfilter_1m1p_v1_1_2b_fvv.root'%datafolder,'dlana/FinalVertexVariables')

df_data_full = proc_df(df_data,'',good_run1_df,bkgBDT_run1,False)

df_data_full['InTraining'] = 0

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [10]:
s_data = 'fakedata_set1_run1'
df_data = read_root('%s/fakedata/dlfilter_fakedata_v08_00_00_29e_dl_ubdlana_v1_1_2_set1_run1_1m1p_stripped.root'%datafolder,'FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run1)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [11]:
s_data = 'fakedata_set1_run3b'
df_data = read_root('%s/fakedata/dlfilter_fakedata_v08_00_00_29e_dl_ubdlana_v1_1_2_set1_run3b_1m1p_stripped.root'%datafolder,'FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run3)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [13]:
s_data = 'fakedata_set3_run1'
df_data = read_root('%s/fakedata/mcc9_v29e_dl_set3_fakedata_run1_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run1)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [14]:
s_data = 'fakedata_set3_run3b'
df_data = read_root('%s/fakedata/mcc9_v29e_dl_set3_fakedata_run3b_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run3)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [7]:
s_data = 'fakedata_set2_run1'
df_data = read_root('%s/fakedata/mcc9_v29e_dl_set2_fakedata_run1_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run1)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [8]:
s_data = 'fakedata_set2_run3b'
df_data = read_root('%s/fakedata/mcc9_v29e_dl_set2_fakedata_run3b_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run3)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [10]:
s_data = 'fakedata_set4_run1'
df_data = read_root('%s/fakedata/mcc9_v29e_dl_set4_fakedata_run1_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run1)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [11]:
s_data = 'fakedata_set4_run3b'
df_data = read_root('%s/fakedata/mcc9_v29e_dl_set4_fakedata_run3b_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run3)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

In [12]:
s_data = 'fakedata_set5_run1'
df_data = read_root('%s/fakedata/mcc9_v29e_dl_set5_fakedata_run1_numu_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')

# remember to turn off pmt precuts
df_data_full = proc_df_fakedata(df_data,bkgBDT_run1)

df_data_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))

# Detector Systematics (run3; not updated)

In [6]:
s_mc = 'numu_run3'
df_mc = read_root('%s/bnb_overlay/mcc9_v29e_dl_run3b_bnb_nu_overlay_nocrtremerge_stripped.root'%datafolder,'FinalVertexVariables')
df_mc_cvweight = read_root('%mc/bnb_overlay/weights_forCV_v40_bnb_nu_run3.root'%datafolder)

df_mc_full = proc_df_mc(df_mc,df_mc_cvweight,good_run3_df)  
df_mc_full = df_bnb_full.query('not (nu_interaction_ccnc==0 and abs(nu_pdg)==12)') # cut out nue ccqes

df_mc_full.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_mc,tag))

del df_mc, df_mc_cvweight, df_mc_full

ValueError: unsupported format character 'm' (0x6d) at index 1

In [30]:
def proc_detvar_mc(df_dlana,df_cv,df_goodrun):
    
    df_cv['isCV'] = True
    df_dlana['isCV'] = False
    # Join cv and detsys together
    # ugh. okay. this phase takes a million years so let's do it smarter
    
    df3_big = pd.concat((df_cv,df_dlana))
    overlap = df_cv.merge(df_dlana[RSE],how='inner')[RSE].drop_duplicates()
    df_full = df3_big.merge(overlap,how='inner')
     
    #df_full = pd.concat((df_dlana[df_dlana[RSE].isin(df_cv[RSE])],df_cv[df_cv[RSE].isin(df_dlana[RSE])])) 
    df_full_wGoodruns = df_full.join(df_goodrun.set_index('run'),on='run')
    df_full_goodruns = df_full_wGoodruns.query('good == 1 and Enu_1m1p > 0')
    df_full_goodruns.insert(0,'PassPrecuts1m1p',df_full_goodruns.apply(passPrecut,axis=1))     
    return df_full_goodruns

def passPrecut(row):
    #orthogonalcut = 'MaxShrFrac < .2'
    #precuts ='PassSimpleCuts == 1 and ChargeNearTrunk > 0 and FailedBoost != 1 and OpenAng > .5 and '
    #s_precut = orthogonalcut + ' and ' + precuts
    return (row['PassSimpleCuts'] == 1 and row['ChargeNearTrunk'] > 0 and row['FailedBoost'] != 1 and row['OpenAng'] > .5 and row['MaxShrFrac'] < .2)

In [None]:
def proc_detvar_mc(df_dlana,df_cv,df_goodrun):
    df_cv['isCV'] = True
    df_dlana['isCV'] = False
    
    # Join cv and detsys together
    df3_big = pd.concat((df_cv,df_dlana))
    overlap = df_cv.merge(df_dlana[RSE],how='inner')[RSE].drop_duplicates()
    df_full = df3_big.merge(overlap,how='inner')
    
    # add goodruns and 1m1p precut tag
    df_full_wGoodruns = df_full.join(df_goodrun.set_index('run'),on='run')
    df_full_goodruns = df_full_wGoodruns.query('good == 1 and Enu_1m1p > 0')
    df_full_goodruns.insert(0,'PassPrecuts1m1p',df_full_goodruns.apply(passPrecut,axis=1))     
    return df_full_goodruns#goodruns_precuts

def passPrecut(row):
    #orthogonalcut = 'MaxShrFrac < .2'
    #precuts ='PassSimpleCuts == 1 and ChargeNearTrunk > 0 and FailedBoost != 1 and OpenAng > .5 and '
    #s_precut = orthogonalcut + ' and ' + precuts
    return (row['PassSimpleCuts'] == 1 and row['ChargeNearTrunk'] > 0 and row['FailedBoost'] != 1 and row['OpenAng'] > .5 and row['MaxShrFrac'] < .2)

In [35]:
df_cv = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_overlay_CV.root','FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/Pi0Sel/July27_CV_numu.txt',sep=',')
df_passPi0['PassFinalSelectionPi0'] = 1
df_cv_wPass = df_cv.join(df_passPi0.set_index(RSEV),on=RSEV)
print('done cv')

s_data = 'wiremodThetaXZ'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_nu_overlay_DetVar_wiremodThetaXZ.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/Pi0Sel/July27_wiremodthetaXZ_numu.txt',sep=',')
df_passPi0['PassFinalSelectionPi0'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'wiremodThetaYZ'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_nu_overlay_DetVar_wiremodThetaYZ.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/Pi0Sel/July27_wiremodthetaYZ_numu.txt',sep=',')
df_passPi0['PassFinalSelectionPi0'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'LYdownRayleigh'
df_bnb = read_root('../../data/detsys/mcc9_v40_dl_run3b_bnb_nu_overlay_DetVar_LYRayleigh.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/Pi0Sel/July27_LYRayleigh_numu.txt',sep=',')
df_passPi0['PassFinalSelectionPi0'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'SCE'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_nu_overlay_DetVar_SCE.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/Pi0Sel/July27_SCE_numu.txt',sep=',')
df_passPi0['PassFinalSelectionPi0'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'LYdown'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_nu_overlay_DetVar_LYdown.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/Pi0Sel/July27_LYdown_numu.txt',sep=',')
df_passPi0['PassFinalSelectionPi0'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'wiremodYZ'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_nu_overlay_DetVar_wiremodYZ.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/Pi0Sel/July27_wiremodYZ_numu.txt',sep=',')
df_passPi0['PassFinalSelectionPi0'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'wiremodX'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_overlay_wiremodX.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/Pi0Sel/July27_wiremodX_numu.txt',sep=',')
df_passPi0['PassFinalSelectionPi0'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'wiremoddEdx'
df_bnb = read_root('../../data/detsys/mcc9_v40_dl_run3b_bnb_nu_overlay_DetVar_wiremodscaleddedx.root','FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/Pi0Sel/July27_scaleddedx_numu.txt',sep=',')
df_passPi0['PassFinalSelectionPi0'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

done cv
done wiremodThetaXZ
done wiremodThetaYZ


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


done LYdownRayleigh
done SCE
done LYdown
done wiremodYZ
done wiremodX
done wiremoddEdx


In [36]:
#Detsys for nues now!
df_cv = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_CV.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_CV.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_cv_wPass = df_cv.merge(df_passPi0,how='left')
print('donecv')

s_data = 'wiremodX'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_wiremodX.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_WireModX.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)


s_data = 'LYdownRayleigh'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_wiremodLYRayleigh.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_LYdownRayleigh.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)


s_data = 'LYdown'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_wiremodLYdown.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_LYdown.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'SCE'
df_bnb = read_root('../../data/detsys/mcc9_v29e_dl_run3b_bnb_intrinsic_nue_wiremodSCE.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_SCE.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'wiremoddEdx'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_wiremodScaleddEdX.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_WireModdEdx.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'wiremodThetaXZ'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_wiremodThetaXZ.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_WireModThetaXZ.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'wiremodThetaYZ'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_wiremodThetaYZ.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_WireModThetaYZ.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

s_data = 'wiremodYZ'
df_bnb = read_root('../../data/detsys/mcc9_v40a_dl_run3b_bnb_intrinsic_nue_overlay_wiremodYZ.root','dlana/FinalVertexVariables')
df_passPi0 = pd.read_table('../../data/detsys/SystematicsForDavio/SystematicsForDavio_WireModYZ.txt',sep=',',names=['run','subrun','event','vtxid'])
df_passPi0['PassFinalSelection1e1p'] = 1
df_bnb_wPass = df_bnb.join(df_passPi0.set_index(RSEV),on=RSEV)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb_wPass,df_cv_wPass,good_run3_df)
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/nue_%s_%s.parquet'%(datafolder,s_data,tag))
print('done %s'%s_data)

donecv
done wiremodX
done LYdownRayleigh
done LYdown
done SCE
done wiremoddEdx
done wiremodThetaXZ
done wiremodThetaYZ
done wiremodYZ


# POT Counting
some old pot counting tools kept for posterity (and because I always forget how to use larlite

In [2]:
import os,sys
import ROOT as rt
from array import array

potsum = 0.0
potsumtraining = 0.0
sanitytest = 0.0

rfile   = rt.TFile('../../data/potsummaries/potsum_mcc9_run3_bnb_overlay.root')
pottree = rfile.Get("potsummary_generator_tree")
nentries = pottree.GetEntries() 
for ientry in range(nentries):
    pottree.GetEntry(ientry)
    idx = tuple((pottree.potsummary_generator_branch.run(),pottree.potsummary_generator_branch.subrun()))
    potsum += pottree.potsummary_generator_branch.totgoodpot
    if idx in dic_train_run3:
        potsumtraining += pottree.potsummary_generator_branch.totgoodpot

print(nentries)
print("POT SUM: ",potsum)
print("POT SUM IN TRAINING: ",potsumtraining)
print("CORRECTEDPOTSUM:",potsum-potsumtraining)
print('TrainingFrac = ',(potsum-potsumtraining)/float(potsum))


NameError: name 'dic_train_run3' is not defined

In [3]:
import ROOT as rt
from larlite import larlite
io = larlite.storage_manager( larlite.storage_manager.kREAD )
io.add_in_filename('../../data/potsummaries/potsum_mcc9_run1_bnb_overlay.root')
io.open()
ll_nentries  = io.get_entries()
for ientry in range(ll_nentries):
    io.go_to(ientry)
    run = io.run_id()
    subrun = io.subrun_id()
    event = io.event_id()
    print(run,subrun,event)
io.close()
print(ll_nentries)

0
[95m    [NORMAL]  [0m[95m<open> [0mOpening a file in READ mode: ../../data/potsummaries/potsum_mcc9_run1_bnb_overlay.root
[91m     [ERROR]  [0m[95m<prepare_tree> [0mDid not find any relevant data tree!
[91m     [ERROR]  [0m[95m<close> [0mAttempt to close file while not operating I/O!
