In [1]:
%matplotlib inline

from ROOT import TFile,vector,TGraph
import ROOT
import matplotlib.pyplot as plt
import pickle
import pandas as pd

import numpy as np
from numpy import mean
from math import sqrt,acos,cos,sin,pi,exp,log,isnan,atan2
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from numpy import asarray
from root_pandas import read_root
from matplotlib import gridspec
from scipy import stats

Welcome to JupyROOT 6.14/08


In [2]:
def bless_tune1_reweight(row):
    rw = float(1)
    if row['nu_interaction_mode']==1001 and (row['nu_pdg']==14 or row['nu_pdg']==-14):
        rw = xsec_tune1_graph_numu.Eval(row['MC_energyInit']) / xsec_mcc9_graph_numu.Eval(row['MC_energyInit'])
    if row['nu_interaction_mode']==1001 and (row['nu_pdg']==12 or row['nu_pdg']==-12):
        rw = xsec_tune1_graph_nue.Eval(row['MC_energyInit']) / xsec_mcc9_graph_nue.Eval(row['MC_energyInit'])
    return rw

def bless_MC_labels(row):
    mclabel = ''
    intlabel = ''
    parentlabel = ''
    pizero = [1090,1086,1090,1080,1015,1013,1011,1008,1006,1004]
    piplusminus = [1085,1079,1032,1017,1014,1007,1005,1003,1028,1021,1016,1012,1010,1009]
        
    if abs(row['nu_pdg']) == 12:
        intlabel = 'nue'
    elif abs(row['nu_pdg']) == 14:
        intlabel = 'numu'
    
    if not 0 < row['MC_scedr'] <= 5.0 :
        mclabel = 'offvtx'
    else:
        if row['nu_interaction_type'] == 1001:
            mclabel = 'CCQE'
        elif row['nu_interaction_type'] == 1000:
            mclabel = 'MEC'
        elif row['nu_interaction_type'] in pizero:
            mclabel = 'pizero'
        elif row['nu_interaction_type'] in piplusminus:
            mclabel = 'piplusminus' 
        else:
            mclabel = 'other'
            
    return '%s_%s'%(intlabel,mclabel)

def bless_leeweight(row):
    wgts_unfolded = np.array((0, 5.03093, 4.50515, 3.50515, 2.31959, 1.31959, 0.64948, 0.27835, 0.11340, 0))
    binedges_unfolded = np.array((0,.200,.250,.300,.350,.400,.450,.500,.600,.800,3.000))
    
    for i in range(0,len(wgts_unfolded)):
        if row['MC_energyInit'] < binedges_unfolded[i+1]:
            return wgts_unfolded[i]
        
    return 0

In [3]:
RSE=['run','subrun','event']

In [4]:
xsec_mcc9_file = ROOT.TFile('../data/xsec_graphs_tune1.root')
xsec_mcc9_graph_numu = xsec_mcc9_file.Get('nu_mu_Ar40/qel_cc_n')
xsec_mcc9_graph_nue = xsec_mcc9_file.Get('nu_e_Ar40/qel_cc_n')
xsec_tune1_file = ROOT.TFile('../data/xsec_graphs_mcc9_v304.root')
xsec_tune1_graph_numu = xsec_tune1_file.Get('nu_mu_Ar40/qel_cc_n')
xsec_tune1_graph_nue = xsec_tune1_file.Get('nu_e_Ar40/qel_cc_n')

In [5]:
# Beam quality
beamq_df = read_root('../data/beamdataquality_remix_bnb5e19.root','bdq')

In [6]:
# test file contents
df_bnb_test = read_root('/media/disk1/kmason/testshower.root','dlana/FinalVertexVariables')
print(df_bnb_test['_pi0mass'].values)
                        

[-9.9999000e+04 -9.9999000e+04  0.0000000e+00 -9.9999000e+04
 -9.9999000e+04 -9.9999000e+04 -9.9999000e+04  0.0000000e+00
  0.0000000e+00 -9.9999000e+04  0.0000000e+00 -9.9999000e+04
 -9.9999000e+04  0.0000000e+00 -9.9999000e+04 -9.9999000e+04
 -9.9999000e+04 -9.9999000e+04 -9.9999000e+04 -9.9999000e+04
  5.6805847e+02  7.3256462e+01 -9.9999000e+04 -9.9999000e+04
  0.0000000e+00  0.0000000e+00 -9.9999000e+04  0.0000000e+00
 -9.9999000e+04 -9.9999000e+04  0.0000000e+00 -9.9999000e+04
 -9.9999000e+04  0.0000000e+00 -9.9999000e+04 -9.9999000e+04
 -9.9999000e+04 -9.9999000e+04 -9.9999000e+04 -9.9999000e+04
 -9.9999000e+04 -9.9999000e+04  0.0000000e+00 -9.9999000e+04
 -9.9999000e+04  0.0000000e+00  0.0000000e+00 -9.9999000e+04
 -9.9999000e+04 -9.9999000e+04 -9.9999000e+04 -9.9999000e+04
  0.0000000e+00 -9.9999000e+04  0.0000000e+00  0.0000000e+00
  0.0000000e+00 -9.9999000e+04 -9.9999000e+04  5.1912512e+02
  0.0000000e+00  6.7604156e+02  7.3346124e+02 -9.9999000e+04
 -9.9999000e+04 -9.99990

# Run 1

In [6]:
tag = 'Nov_6_run1_fullosc'
good_df = pd.read_csv('../data/goodruns_2020.txt') # run1
good_df['good'] = 1

RSE=['run','subrun','event']

df_bnb = read_root('/media/disk1/kmason/mcc9_v29e_dl_run1_fullosc_v1_1_3_fvv.root','dlana/FinalVertexVariables')
print(len(df_bnb))
df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_bnb_nu_run1.root')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
cols = df_bnb.columns.tolist()
print(cols)


print('Loaded Files')

df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)

df_numu_goodruns = df_bnb
print(len(df_numu_goodruns))

df_numu_goodruns_precuts = df_numu_goodruns
print(len(df_numu_goodruns_precuts))

print('Merged')

df_numu_goodruns_precuts.insert(0,'xsec_tune1_weight',df_numu_goodruns_precuts.apply(bless_tune1_reweight,axis=1))

print('Genie Reweighted')

df_numu_goodruns_precuts.insert(0,'mc_label',df_numu_goodruns_precuts.apply(bless_MC_labels,axis=1))
print ('MC Labeled')

df_numu_goodruns_precuts.to_pickle('../data/pickles/numu_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_bnb,df_bnb_cvweight,df_numu_goodruns_precuts

print('Cleaned up')

83193
['run', 'subrun', 'event', 'vtxid', 'Xreco', 'Yreco', 'Zreco', 'InFiducial', 'AnyReco', 'NTracks', 'N5cmTracks', 'PassSimpleCuts', 'PassShowerReco', 'PassSecondShower', 'FailedBoost', 'FailedBoost_1m1p', 'FailedBoost_1e1p', 'Good3DReco', 'Eta', 'OpenAng', 'Thetas', 'Phis', 'QCorrectionFactorVertex', 'ChargeNearTrunk', 'LongTrackLen', 'ShortTrackLen', 'MaxShrFrac', 'MinShrFrac', 'CCQEEnergyShift_1m1p', 'Enu_1m1p', 'PhiT_1m1p', 'AlphaT_1m1p', 'PT_1m1p', 'PTRat_1m1p', 'BjX_1m1p', 'BjY_1m1p', 'Q2_1m1p', 'Sph_1m1p', 'PzEnu_1m1p', 'Q0_1m1p', 'Q3_1m1p', 'OpenAngB_1m1p', 'ThetasB_1m1p', 'PhisB_1m1p', 'PhiTB_1m1p', 'AlphaTB_1m1p', 'PTB_1m1p', 'BjXB_1m1p', 'BjYB_1m1p', 'Q2B_1m1p', 'SphB_1m1p', 'CCQEEnergyShift_1e1p', 'Enu_1e1p', 'PhiT_1e1p', 'AlphaT_1e1p', 'PT_1e1p', 'PTRat_1e1p', 'BjX_1e1p', 'BjY_1e1p', 'Q2_1e1p', 'Sph_1e1p', 'PzEnu_1e1p', 'Q0_1e1p', 'Q3_1e1p', 'OpenAngB_1e1p', 'ThetasB_1e1p', 'PhisB_1e1p', 'PhiTB_1e1p', 'AlphaTB_1e1p', 'PTB_1e1p', 'BjXB_1e1p', 'BjYB_1e1p', 'Q2B_1e1p', 'S

# Run 3

In [7]:
tag = 'Nov_6_run3_fullosc'
good_df = pd.read_csv('../data/goodruns_run3.txt') # run1
good_df['good'] = 1

RSE=['run','subrun','event']

df_bnb = read_root('/media/disk1/kmason/mcc9_v29e_dl_run3b_fullosc_v1_1_3_fvv.root','dlana/FinalVertexVariables')
print(len(df_bnb))
df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_bnb_nu_run3.root')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
cols = df_bnb.columns.tolist()
print(cols)


print('Loaded Files')

df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)

df_numu_goodruns = df_bnb
print(len(df_numu_goodruns))

df_numu_goodruns_precuts = df_numu_goodruns
print(len(df_numu_goodruns_precuts))

print('Merged')

df_numu_goodruns_precuts.insert(0,'xsec_tune1_weight',df_numu_goodruns_precuts.apply(bless_tune1_reweight,axis=1))

print('Genie Reweighted')

df_numu_goodruns_precuts.insert(0,'mc_label',df_numu_goodruns_precuts.apply(bless_MC_labels,axis=1))
print ('MC Labeled')

df_numu_goodruns_precuts.to_pickle('../data/pickles/numu_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_bnb,df_bnb_cvweight,df_numu_goodruns_precuts

print('Cleaned up')

87251
['run', 'subrun', 'event', 'vtxid', 'Xreco', 'Yreco', 'Zreco', 'InFiducial', 'AnyReco', 'NTracks', 'N5cmTracks', 'PassSimpleCuts', 'PassShowerReco', 'PassSecondShower', 'FailedBoost', 'FailedBoost_1m1p', 'FailedBoost_1e1p', 'Good3DReco', 'Eta', 'OpenAng', 'Thetas', 'Phis', 'QCorrectionFactorVertex', 'ChargeNearTrunk', 'LongTrackLen', 'ShortTrackLen', 'MaxShrFrac', 'MinShrFrac', 'CCQEEnergyShift_1m1p', 'Enu_1m1p', 'PhiT_1m1p', 'AlphaT_1m1p', 'PT_1m1p', 'PTRat_1m1p', 'BjX_1m1p', 'BjY_1m1p', 'Q2_1m1p', 'Sph_1m1p', 'PzEnu_1m1p', 'Q0_1m1p', 'Q3_1m1p', 'OpenAngB_1m1p', 'ThetasB_1m1p', 'PhisB_1m1p', 'PhiTB_1m1p', 'AlphaTB_1m1p', 'PTB_1m1p', 'BjXB_1m1p', 'BjYB_1m1p', 'Q2B_1m1p', 'SphB_1m1p', 'CCQEEnergyShift_1e1p', 'Enu_1e1p', 'PhiT_1e1p', 'AlphaT_1e1p', 'PT_1e1p', 'PTRat_1e1p', 'BjX_1e1p', 'BjY_1e1p', 'Q2_1e1p', 'Sph_1e1p', 'PzEnu_1e1p', 'Q0_1e1p', 'Q3_1e1p', 'OpenAngB_1e1p', 'ThetasB_1e1p', 'PhisB_1e1p', 'PhiTB_1e1p', 'AlphaTB_1e1p', 'PTB_1e1p', 'BjXB_1e1p', 'BjYB_1e1p', 'Q2B_1e1p', 'S

# Low E run 1 numu

In [8]:
tag = 'Nov_6_run1_lowE_numu'
good_df = pd.read_csv('../data/goodruns_2020.txt') # run1
good_df['good'] = 1

RSE=['run','subrun','event']

df_bnb = read_root('/media/disk1/kmason/mcc9_v29e_run1_bnb_nu_overlay_LowE.root','dlana/FinalVertexVariables')
print(len(df_bnb))
df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_bnb_nu_lowE_run1.root')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
cols = df_bnb.columns.tolist()
print(cols)


print('Loaded Files')

df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)

df_numu_goodruns = df_bnb
print(len(df_numu_goodruns))

df_numu_goodruns_precuts = df_numu_goodruns
print(len(df_numu_goodruns_precuts))

print('Merged')

df_numu_goodruns_precuts.insert(0,'xsec_tune1_weight',df_numu_goodruns_precuts.apply(bless_tune1_reweight,axis=1))

print('Genie Reweighted')

df_numu_goodruns_precuts.insert(0,'mc_label',df_numu_goodruns_precuts.apply(bless_MC_labels,axis=1))
print ('MC Labeled')

df_numu_goodruns_precuts.to_pickle('../data/pickles/numu_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_bnb,df_bnb_cvweight,df_numu_goodruns_precuts

print('Cleaned up')

5182
['run', 'subrun', 'event', 'vtxid', 'Xreco', 'Yreco', 'Zreco', 'InFiducial', 'AnyReco', 'NTracks', 'N5cmTracks', 'PassSimpleCuts', 'PassShowerReco', 'PassSecondShower', 'FailedBoost', 'FailedBoost_1m1p', 'FailedBoost_1e1p', 'Good3DReco', 'Eta', 'OpenAng', 'Thetas', 'Phis', 'QCorrectionFactorVertex', 'ChargeNearTrunk', 'LongTrackLen', 'ShortTrackLen', 'MaxShrFrac', 'MinShrFrac', 'CCQEEnergyShift_1m1p', 'Enu_1m1p', 'PhiT_1m1p', 'AlphaT_1m1p', 'PT_1m1p', 'PTRat_1m1p', 'BjX_1m1p', 'BjY_1m1p', 'Q2_1m1p', 'Sph_1m1p', 'PzEnu_1m1p', 'Q0_1m1p', 'Q3_1m1p', 'OpenAngB_1m1p', 'ThetasB_1m1p', 'PhisB_1m1p', 'PhiTB_1m1p', 'AlphaTB_1m1p', 'PTB_1m1p', 'BjXB_1m1p', 'BjYB_1m1p', 'Q2B_1m1p', 'SphB_1m1p', 'CCQEEnergyShift_1e1p', 'Enu_1e1p', 'PhiT_1e1p', 'AlphaT_1e1p', 'PT_1e1p', 'PTRat_1e1p', 'BjX_1e1p', 'BjY_1e1p', 'Q2_1e1p', 'Sph_1e1p', 'PzEnu_1e1p', 'Q0_1e1p', 'Q3_1e1p', 'OpenAngB_1e1p', 'ThetasB_1e1p', 'PhisB_1e1p', 'PhiTB_1e1p', 'AlphaTB_1e1p', 'PTB_1e1p', 'BjXB_1e1p', 'BjYB_1e1p', 'Q2B_1e1p', 'Sp

# Low E run 3 numu


In [9]:
tag = 'Nov_6_run3_lowE_numu'
good_df = pd.read_csv('../data/goodruns_run3.txt') # run1
good_df['good'] = 1

RSE=['run','subrun','event']

df_bnb = read_root('/media/disk1/kmason/mcc9_v29e_run3b_bnb_nu_overlay_LowE.root','dlana/FinalVertexVariables')
print(len(df_bnb))
df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_bnb_nu_lowE_run3.root')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
cols = df_bnb.columns.tolist()
print(cols)


print('Loaded Files')

df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)

df_numu_goodruns = df_bnb
print(len(df_numu_goodruns))

df_numu_goodruns_precuts = df_numu_goodruns
print(len(df_numu_goodruns_precuts))

print('Merged')

df_numu_goodruns_precuts.insert(0,'xsec_tune1_weight',df_numu_goodruns_precuts.apply(bless_tune1_reweight,axis=1))

print('Genie Reweighted')

df_numu_goodruns_precuts.insert(0,'mc_label',df_numu_goodruns_precuts.apply(bless_MC_labels,axis=1))
print ('MC Labeled')

df_numu_goodruns_precuts.to_pickle('../data/pickles/numu_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_bnb,df_bnb_cvweight,df_numu_goodruns_precuts

print('Cleaned up')

5080
['run', 'subrun', 'event', 'vtxid', 'Xreco', 'Yreco', 'Zreco', 'InFiducial', 'AnyReco', 'NTracks', 'N5cmTracks', 'PassSimpleCuts', 'PassShowerReco', 'PassSecondShower', 'FailedBoost', 'FailedBoost_1m1p', 'FailedBoost_1e1p', 'Good3DReco', 'Eta', 'OpenAng', 'Thetas', 'Phis', 'QCorrectionFactorVertex', 'ChargeNearTrunk', 'LongTrackLen', 'ShortTrackLen', 'MaxShrFrac', 'MinShrFrac', 'CCQEEnergyShift_1m1p', 'Enu_1m1p', 'PhiT_1m1p', 'AlphaT_1m1p', 'PT_1m1p', 'PTRat_1m1p', 'BjX_1m1p', 'BjY_1m1p', 'Q2_1m1p', 'Sph_1m1p', 'PzEnu_1m1p', 'Q0_1m1p', 'Q3_1m1p', 'OpenAngB_1m1p', 'ThetasB_1m1p', 'PhisB_1m1p', 'PhiTB_1m1p', 'AlphaTB_1m1p', 'PTB_1m1p', 'BjXB_1m1p', 'BjYB_1m1p', 'Q2B_1m1p', 'SphB_1m1p', 'CCQEEnergyShift_1e1p', 'Enu_1e1p', 'PhiT_1e1p', 'AlphaT_1e1p', 'PT_1e1p', 'PTRat_1e1p', 'BjX_1e1p', 'BjY_1e1p', 'Q2_1e1p', 'Sph_1e1p', 'PzEnu_1e1p', 'Q0_1e1p', 'Q3_1e1p', 'OpenAngB_1e1p', 'ThetasB_1e1p', 'PhisB_1e1p', 'PhiTB_1e1p', 'AlphaTB_1e1p', 'PTB_1e1p', 'BjXB_1e1p', 'BjYB_1e1p', 'Q2B_1e1p', 'Sp

## lowE run1 nue

In [10]:
tag = 'Nov_6_run1_lowE_nue'
good_df = pd.read_csv('../data/goodruns_2020.txt') # run1
good_df['good'] = 1

RSE=['run','subrun','event']

df_bnb = read_root('/media/disk1/kmason/mcc9_v29e_run1_bnb_intrinsic_nue_overlay_LowE.root','dlana/FinalVertexVariables')
print(len(df_bnb))
df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_intrinsic_nue_lowE_run1.root')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
cols = df_bnb.columns.tolist()
print(cols)


print('Loaded Files')

df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)

df_numu_goodruns = df_bnb
print(len(df_numu_goodruns))

df_numu_goodruns_precuts = df_numu_goodruns
print(len(df_numu_goodruns_precuts))

print('Merged')

df_numu_goodruns_precuts.insert(0,'xsec_tune1_weight',df_numu_goodruns_precuts.apply(bless_tune1_reweight,axis=1))

print('Genie Reweighted')

df_numu_goodruns_precuts.insert(0,'mc_label',df_numu_goodruns_precuts.apply(bless_MC_labels,axis=1))
print ('MC Labeled')

df_numu_goodruns_precuts.to_pickle('../data/pickles/numu_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_bnb,df_bnb_cvweight,df_numu_goodruns_precuts

print('Cleaned up')

10893
['run', 'subrun', 'event', 'vtxid', 'Xreco', 'Yreco', 'Zreco', 'InFiducial', 'AnyReco', 'NTracks', 'N5cmTracks', 'PassSimpleCuts', 'PassShowerReco', 'PassSecondShower', 'FailedBoost', 'FailedBoost_1m1p', 'FailedBoost_1e1p', 'Good3DReco', 'Eta', 'OpenAng', 'Thetas', 'Phis', 'QCorrectionFactorVertex', 'ChargeNearTrunk', 'LongTrackLen', 'ShortTrackLen', 'MaxShrFrac', 'MinShrFrac', 'CCQEEnergyShift_1m1p', 'Enu_1m1p', 'PhiT_1m1p', 'AlphaT_1m1p', 'PT_1m1p', 'PTRat_1m1p', 'BjX_1m1p', 'BjY_1m1p', 'Q2_1m1p', 'Sph_1m1p', 'PzEnu_1m1p', 'Q0_1m1p', 'Q3_1m1p', 'OpenAngB_1m1p', 'ThetasB_1m1p', 'PhisB_1m1p', 'PhiTB_1m1p', 'AlphaTB_1m1p', 'PTB_1m1p', 'BjXB_1m1p', 'BjYB_1m1p', 'Q2B_1m1p', 'SphB_1m1p', 'CCQEEnergyShift_1e1p', 'Enu_1e1p', 'PhiT_1e1p', 'AlphaT_1e1p', 'PT_1e1p', 'PTRat_1e1p', 'BjX_1e1p', 'BjY_1e1p', 'Q2_1e1p', 'Sph_1e1p', 'PzEnu_1e1p', 'Q0_1e1p', 'Q3_1e1p', 'OpenAngB_1e1p', 'ThetasB_1e1p', 'PhisB_1e1p', 'PhiTB_1e1p', 'AlphaTB_1e1p', 'PTB_1e1p', 'BjXB_1e1p', 'BjYB_1e1p', 'Q2B_1e1p', 'S

## lowE run3 nue

In [11]:
tag = 'Nov_6_run3_lowE_nue'
good_df = pd.read_csv('../data/goodruns_run3.txt') # run1
good_df['good'] = 1

RSE=['run','subrun','event']

df_bnb = read_root('/media/disk1/kmason/mcc9_v29e_dl_run3b_intrinsic_nue_LowE.root','dlana/FinalVertexVariables')
print(len(df_bnb))
df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_intrinsic_nue_lowE_run3.root')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
cols = df_bnb.columns.tolist()
print(cols)


print('Loaded Files')

df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)

df_numu_goodruns = df_bnb
print(len(df_numu_goodruns))

df_numu_goodruns_precuts = df_numu_goodruns
print(len(df_numu_goodruns_precuts))

print('Merged')

df_numu_goodruns_precuts.insert(0,'xsec_tune1_weight',df_numu_goodruns_precuts.apply(bless_tune1_reweight,axis=1))

print('Genie Reweighted')

df_numu_goodruns_precuts.insert(0,'mc_label',df_numu_goodruns_precuts.apply(bless_MC_labels,axis=1))
print ('MC Labeled')

df_numu_goodruns_precuts.to_pickle('../data/pickles/numu_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_bnb,df_bnb_cvweight,df_numu_goodruns_precuts

print('Cleaned up')

10736
['run', 'subrun', 'event', 'vtxid', 'Xreco', 'Yreco', 'Zreco', 'InFiducial', 'AnyReco', 'NTracks', 'N5cmTracks', 'PassSimpleCuts', 'PassShowerReco', 'PassSecondShower', 'FailedBoost', 'FailedBoost_1m1p', 'FailedBoost_1e1p', 'Good3DReco', 'Eta', 'OpenAng', 'Thetas', 'Phis', 'QCorrectionFactorVertex', 'ChargeNearTrunk', 'LongTrackLen', 'ShortTrackLen', 'MaxShrFrac', 'MinShrFrac', 'CCQEEnergyShift_1m1p', 'Enu_1m1p', 'PhiT_1m1p', 'AlphaT_1m1p', 'PT_1m1p', 'PTRat_1m1p', 'BjX_1m1p', 'BjY_1m1p', 'Q2_1m1p', 'Sph_1m1p', 'PzEnu_1m1p', 'Q0_1m1p', 'Q3_1m1p', 'OpenAngB_1m1p', 'ThetasB_1m1p', 'PhisB_1m1p', 'PhiTB_1m1p', 'AlphaTB_1m1p', 'PTB_1m1p', 'BjXB_1m1p', 'BjYB_1m1p', 'Q2B_1m1p', 'SphB_1m1p', 'CCQEEnergyShift_1e1p', 'Enu_1e1p', 'PhiT_1e1p', 'AlphaT_1e1p', 'PT_1e1p', 'PTRat_1e1p', 'BjX_1e1p', 'BjY_1e1p', 'Q2_1e1p', 'Sph_1e1p', 'PzEnu_1e1p', 'Q0_1e1p', 'Q3_1e1p', 'OpenAngB_1e1p', 'ThetasB_1e1p', 'PhisB_1e1p', 'PhiTB_1e1p', 'AlphaTB_1e1p', 'PTB_1e1p', 'BjXB_1e1p', 'BjYB_1e1p', 'Q2B_1e1p', 'S

# Numu run 1

In [12]:
tag = 'Nov_6_run1_numu'
good_df = pd.read_csv('../data/goodruns_2020.txt') # run1
good_df['good'] = 1

RSE=['run','subrun','event']

df_bnb = read_root('/media/disk1/kmason/mcc9_v28_wctagger_bnboverlay_finalbdt.root','dlana/FinalVertexVariables')
print(len(df_bnb))
df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_bnb_nu_run1.root')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
cols = df_bnb.columns.tolist()
print(cols)


print('Loaded Files')

df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)
print(df_bnb['xsec_corr_weight'].values)
df_numu_goodruns = df_bnb
print(len(df_numu_goodruns))

df_numu_goodruns_precuts = df_numu_goodruns
print(len(df_numu_goodruns_precuts))

print('Merged')

df_numu_goodruns_precuts.insert(0,'xsec_tune1_weight',df_numu_goodruns_precuts.apply(bless_tune1_reweight,axis=1))

print('Genie Reweighted')

df_numu_goodruns_precuts.insert(0,'mc_label',df_numu_goodruns_precuts.apply(bless_MC_labels,axis=1))
print ('MC Labeled')

df_numu_goodruns_precuts.to_pickle('../data/pickles/numu_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_bnb,df_bnb_cvweight,df_numu_goodruns_precuts

print('Cleaned up')

161015
['run', 'subrun', 'event', 'vtxid', 'Xreco', 'Yreco', 'Zreco', 'InFiducial', 'AnyReco', 'NTracks', 'N5cmTracks', 'PassSimpleCuts', 'PassShowerReco', 'PassSecondShower', 'FailedBoost', 'FailedBoost_1m1p', 'FailedBoost_1e1p', 'Good3DReco', 'Eta', 'OpenAng', 'Thetas', 'Phis', 'QCorrectionFactorVertex', 'ChargeNearTrunk', 'LongTrackLen', 'ShortTrackLen', 'MaxShrFrac', 'MinShrFrac', 'CCQEEnergyShift_1m1p', 'Enu_1m1p', 'PhiT_1m1p', 'AlphaT_1m1p', 'PT_1m1p', 'PTRat_1m1p', 'BjX_1m1p', 'BjY_1m1p', 'Q2_1m1p', 'Sph_1m1p', 'PzEnu_1m1p', 'Q0_1m1p', 'Q3_1m1p', 'OpenAngB_1m1p', 'ThetasB_1m1p', 'PhisB_1m1p', 'PhiTB_1m1p', 'AlphaTB_1m1p', 'PTB_1m1p', 'BjXB_1m1p', 'BjYB_1m1p', 'Q2B_1m1p', 'SphB_1m1p', 'CCQEEnergyShift_1e1p', 'Enu_1e1p', 'PhiT_1e1p', 'AlphaT_1e1p', 'PT_1e1p', 'PTRat_1e1p', 'BjX_1e1p', 'BjY_1e1p', 'Q2_1e1p', 'Sph_1e1p', 'PzEnu_1e1p', 'Q0_1e1p', 'Q3_1e1p', 'OpenAngB_1e1p', 'ThetasB_1e1p', 'PhisB_1e1p', 'PhiTB_1e1p', 'AlphaTB_1e1p', 'PTB_1e1p', 'BjXB_1e1p', 'BjYB_1e1p', 'Q2B_1e1p', '

# numu run 3

In [13]:
tag = 'Nov_6_run3_numu'
good_df = pd.read_csv('../data/goodruns_run3.txt') # run1
good_df['good'] = 1

RSE=['run','subrun','event']

df_bnb = read_root('/media/disk1/kmason/mcc9_v29e_dl_run3b_bnb_nu_overlay_nocrtremerge_finalbdt.root','dlana/FinalVertexVariables')
print(len(df_bnb))
df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_bnb_nu_run3.root')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
cols = df_bnb.columns.tolist()
print(cols)


print('Loaded Files')

df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)

df_numu_goodruns = df_bnb
print(len(df_numu_goodruns))

df_numu_goodruns_precuts = df_numu_goodruns
print(len(df_numu_goodruns_precuts))

print('Merged')

df_numu_goodruns_precuts.insert(0,'xsec_tune1_weight',df_numu_goodruns_precuts.apply(bless_tune1_reweight,axis=1))

print('Genie Reweighted')

df_numu_goodruns_precuts.insert(0,'mc_label',df_numu_goodruns_precuts.apply(bless_MC_labels,axis=1))
print ('MC Labeled')

df_numu_goodruns_precuts.to_pickle('../data/pickles/numu_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_bnb,df_bnb_cvweight,df_numu_goodruns_precuts

print('Cleaned up')

314302
['run', 'subrun', 'event', 'vtxid', 'Xreco', 'Yreco', 'Zreco', 'InFiducial', 'AnyReco', 'NTracks', 'N5cmTracks', 'PassSimpleCuts', 'PassShowerReco', 'PassSecondShower', 'FailedBoost', 'FailedBoost_1m1p', 'FailedBoost_1e1p', 'Good3DReco', 'Eta', 'OpenAng', 'Thetas', 'Phis', 'QCorrectionFactorVertex', 'ChargeNearTrunk', 'LongTrackLen', 'ShortTrackLen', 'MaxShrFrac', 'MinShrFrac', 'CCQEEnergyShift_1m1p', 'Enu_1m1p', 'PhiT_1m1p', 'AlphaT_1m1p', 'PT_1m1p', 'PTRat_1m1p', 'BjX_1m1p', 'BjY_1m1p', 'Q2_1m1p', 'Sph_1m1p', 'PzEnu_1m1p', 'Q0_1m1p', 'Q3_1m1p', 'OpenAngB_1m1p', 'ThetasB_1m1p', 'PhisB_1m1p', 'PhiTB_1m1p', 'AlphaTB_1m1p', 'PTB_1m1p', 'BjXB_1m1p', 'BjYB_1m1p', 'Q2B_1m1p', 'SphB_1m1p', 'CCQEEnergyShift_1e1p', 'Enu_1e1p', 'PhiT_1e1p', 'AlphaT_1e1p', 'PT_1e1p', 'PTRat_1e1p', 'BjX_1e1p', 'BjY_1e1p', 'Q2_1e1p', 'Sph_1e1p', 'PzEnu_1e1p', 'Q0_1e1p', 'Q3_1e1p', 'OpenAngB_1e1p', 'ThetasB_1e1p', 'PhisB_1e1p', 'PhiTB_1e1p', 'AlphaTB_1e1p', 'PTB_1e1p', 'BjXB_1e1p', 'BjYB_1e1p', 'Q2B_1e1p', '

## nue run 1

In [14]:
tag = 'Nov_6_run1_nue'
good_df = pd.read_csv('../data/goodruns_2020.txt') # run1
good_df['good'] = 1

RSE=['run','subrun','event']

df_bnb = read_root('/media/disk1/kmason/mcc9_v28_wctagger_nueintrinsics_finalbdt.root','dlana/FinalVertexVariables')
print(len(df_bnb))
df_bnb_cvweight = read_root('/home/kmason/pythonscratch_newshowerreco/dllee_unified/1L1PSelection/data/weights_forCV_v48_Sep24_intrinsic_nue_run1.root')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
cols = df_bnb.columns.tolist()
print(cols)


print('Loaded Files')

df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)

df_numu_goodruns = df_bnb
print(len(df_numu_goodruns))

df_numu_goodruns_precuts = df_numu_goodruns
print(len(df_numu_goodruns_precuts))

print('Merged')

df_numu_goodruns_precuts.insert(0,'xsec_tune1_weight',df_numu_goodruns_precuts.apply(bless_tune1_reweight,axis=1))

print('Genie Reweighted')

df_numu_goodruns_precuts.insert(0,'mc_label',df_numu_goodruns_precuts.apply(bless_MC_labels,axis=1))
print ('MC Labeled')

df_numu_goodruns_precuts.to_pickle('../data/pickles/nue_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_bnb,df_bnb_cvweight,df_numu_goodruns_precuts

# del df_bnb,df_numu_goodruns_precuts
print('Cleaned up')



190064
['run', 'subrun', 'event', 'vtxid', 'Xreco', 'Yreco', 'Zreco', 'InFiducial', 'AnyReco', 'NTracks', 'N5cmTracks', 'PassSimpleCuts', 'PassShowerReco', 'PassSecondShower', 'FailedBoost', 'FailedBoost_1m1p', 'FailedBoost_1e1p', 'Good3DReco', 'Eta', 'OpenAng', 'Thetas', 'Phis', 'QCorrectionFactorVertex', 'ChargeNearTrunk', 'LongTrackLen', 'ShortTrackLen', 'MaxShrFrac', 'MinShrFrac', 'CCQEEnergyShift_1m1p', 'Enu_1m1p', 'PhiT_1m1p', 'AlphaT_1m1p', 'PT_1m1p', 'PTRat_1m1p', 'BjX_1m1p', 'BjY_1m1p', 'Q2_1m1p', 'Sph_1m1p', 'PzEnu_1m1p', 'Q0_1m1p', 'Q3_1m1p', 'OpenAngB_1m1p', 'ThetasB_1m1p', 'PhisB_1m1p', 'PhiTB_1m1p', 'AlphaTB_1m1p', 'PTB_1m1p', 'BjXB_1m1p', 'BjYB_1m1p', 'Q2B_1m1p', 'SphB_1m1p', 'CCQEEnergyShift_1e1p', 'Enu_1e1p', 'PhiT_1e1p', 'AlphaT_1e1p', 'PT_1e1p', 'PTRat_1e1p', 'BjX_1e1p', 'BjY_1e1p', 'Q2_1e1p', 'Sph_1e1p', 'PzEnu_1e1p', 'Q0_1e1p', 'Q3_1e1p', 'OpenAngB_1e1p', 'ThetasB_1e1p', 'PhisB_1e1p', 'PhiTB_1e1p', 'AlphaTB_1e1p', 'PTB_1e1p', 'BjXB_1e1p', 'BjYB_1e1p', 'Q2B_1e1p', '

## nue run 3

In [15]:
tag = 'Nov_6_run3_nue'
good_df = pd.read_csv('../data/goodruns_run3.txt') # run1
good_df['good'] = 1

RSE=['run','subrun','event']

df_bnb = read_root('/media/disk1/kmason/mcc9_v29e_dl_run3b_bnb_intrinsic_nue_overlay_nocrtremerge_finalbdt.root','dlana/FinalVertexVariables')
print(len(df_bnb))
df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_intrinsic_nue_run3.root')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
cols = df_bnb.columns.tolist()
print(cols)


print('Loaded Files')

df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)

df_numu_goodruns = df_bnb
print(len(df_numu_goodruns))

df_numu_goodruns_precuts = df_numu_goodruns
print(len(df_numu_goodruns_precuts))

print('Merged')

df_numu_goodruns_precuts.insert(0,'xsec_tune1_weight',df_numu_goodruns_precuts.apply(bless_tune1_reweight,axis=1))

print('Genie Reweighted')

df_numu_goodruns_precuts.insert(0,'mc_label',df_numu_goodruns_precuts.apply(bless_MC_labels,axis=1))
print ('MC Labeled')

df_numu_goodruns_precuts.to_pickle('../data/pickles/nue_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_bnb,df_bnb_cvweight,df_numu_goodruns_precuts

print('Cleaned up')

100438
['run', 'subrun', 'event', 'vtxid', 'Xreco', 'Yreco', 'Zreco', 'InFiducial', 'AnyReco', 'NTracks', 'N5cmTracks', 'PassSimpleCuts', 'PassShowerReco', 'PassSecondShower', 'FailedBoost', 'FailedBoost_1m1p', 'FailedBoost_1e1p', 'Good3DReco', 'Eta', 'OpenAng', 'Thetas', 'Phis', 'QCorrectionFactorVertex', 'ChargeNearTrunk', 'LongTrackLen', 'ShortTrackLen', 'MaxShrFrac', 'MinShrFrac', 'CCQEEnergyShift_1m1p', 'Enu_1m1p', 'PhiT_1m1p', 'AlphaT_1m1p', 'PT_1m1p', 'PTRat_1m1p', 'BjX_1m1p', 'BjY_1m1p', 'Q2_1m1p', 'Sph_1m1p', 'PzEnu_1m1p', 'Q0_1m1p', 'Q3_1m1p', 'OpenAngB_1m1p', 'ThetasB_1m1p', 'PhisB_1m1p', 'PhiTB_1m1p', 'AlphaTB_1m1p', 'PTB_1m1p', 'BjXB_1m1p', 'BjYB_1m1p', 'Q2B_1m1p', 'SphB_1m1p', 'CCQEEnergyShift_1e1p', 'Enu_1e1p', 'PhiT_1e1p', 'AlphaT_1e1p', 'PT_1e1p', 'PTRat_1e1p', 'BjX_1e1p', 'BjY_1e1p', 'Q2_1e1p', 'Sph_1e1p', 'PzEnu_1e1p', 'Q0_1e1p', 'Q3_1e1p', 'OpenAngB_1e1p', 'ThetasB_1e1p', 'PhisB_1e1p', 'PhiTB_1e1p', 'AlphaTB_1e1p', 'PTB_1e1p', 'BjXB_1e1p', 'BjYB_1e1p', 'Q2B_1e1p', '

## nue run 2

In [16]:
tag = 'Nov_6_run2_nue'
good_df = pd.read_csv('../data/goodruns_run2.txt') # run1
good_df['good'] = 1

RSE=['run','subrun','event']

df_bnb = read_root('/media/disk1/kmason/mcc9_v29e_dl_run2_bnb_intrinsics_nue_overlay_finalbdt.root','dlana/FinalVertexVariables')
print(len(df_bnb))
df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_intrinsic_nue_run2.root')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
cols = df_bnb.columns.tolist()
print(cols)


print('Loaded Files')

df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)

df_numu_goodruns = df_bnb
print(len(df_numu_goodruns))

df_numu_goodruns_precuts = df_numu_goodruns
print(len(df_numu_goodruns_precuts))

print('Merged')

df_numu_goodruns_precuts.insert(0,'xsec_tune1_weight',df_numu_goodruns_precuts.apply(bless_tune1_reweight,axis=1))

print('Genie Reweighted')

df_numu_goodruns_precuts.insert(0,'mc_label',df_numu_goodruns_precuts.apply(bless_MC_labels,axis=1))
print ('MC Labeled')

df_numu_goodruns_precuts.to_pickle('../data/pickles/nue_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_bnb,df_bnb_cvweight,df_numu_goodruns_precuts

print('Cleaned up')

194423
['run', 'subrun', 'event', 'vtxid', 'Xreco', 'Yreco', 'Zreco', 'InFiducial', 'AnyReco', 'NTracks', 'N5cmTracks', 'PassSimpleCuts', 'PassShowerReco', 'PassSecondShower', 'FailedBoost', 'FailedBoost_1m1p', 'FailedBoost_1e1p', 'Good3DReco', 'Eta', 'OpenAng', 'Thetas', 'Phis', 'QCorrectionFactorVertex', 'ChargeNearTrunk', 'LongTrackLen', 'ShortTrackLen', 'MaxShrFrac', 'MinShrFrac', 'CCQEEnergyShift_1m1p', 'Enu_1m1p', 'PhiT_1m1p', 'AlphaT_1m1p', 'PT_1m1p', 'PTRat_1m1p', 'BjX_1m1p', 'BjY_1m1p', 'Q2_1m1p', 'Sph_1m1p', 'PzEnu_1m1p', 'Q0_1m1p', 'Q3_1m1p', 'OpenAngB_1m1p', 'ThetasB_1m1p', 'PhisB_1m1p', 'PhiTB_1m1p', 'AlphaTB_1m1p', 'PTB_1m1p', 'BjXB_1m1p', 'BjYB_1m1p', 'Q2B_1m1p', 'SphB_1m1p', 'CCQEEnergyShift_1e1p', 'Enu_1e1p', 'PhiT_1e1p', 'AlphaT_1e1p', 'PT_1e1p', 'PTRat_1e1p', 'BjX_1e1p', 'BjY_1e1p', 'Q2_1e1p', 'Sph_1e1p', 'PzEnu_1e1p', 'Q0_1e1p', 'Q3_1e1p', 'OpenAngB_1e1p', 'ThetasB_1e1p', 'PhisB_1e1p', 'PhiTB_1e1p', 'AlphaTB_1e1p', 'PTB_1e1p', 'BjXB_1e1p', 'BjYB_1e1p', 'Q2B_1e1p', '

## ext run1

In [17]:
tag = 'Nov_6_run1_ext'
good_df = pd.read_csv('../data/goodruns_2020.txt') # run1
good_df['good'] = 1
df_ext = read_root('/media/disk1/kmason/mcc9_v28_wctagger_extbnb_finalbdt.root','dlana/FinalVertexVariables')

print('Loaded files')
print(len(df_ext))

df_ext = df_ext.join(good_df.set_index('run'),on='run')

df_ext_goodruns = df_ext
df_ext_goodruns_precuts = df_ext_goodruns

# df_ext_goodruns_precuts = df_ext_goodruns_precuts.sort_values('shower1_E_Y',ascending=False).drop_duplicates(RSE).sort_index()
print(len(df_ext_goodruns_precuts))

print('Merged')

df_ext_goodruns_precuts.to_pickle('../data/pickles/ext_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_ext,df_ext_goodruns,df_ext_goodruns_precuts

print('Cleaned up')

Loaded files
64907
64907
Merged
Saved Parquet
Cleaned up


## ext run 3

In [18]:
tag = 'Nov_6_run3_ext'
good_df = pd.read_csv('../data/goodruns_run3.txt') # run1
good_df['good'] = 1
df_ext = read_root('/media/disk1/kmason/mcc9_v29e_dl_run3_G1_extbnb_finalbdt.root','dlana/FinalVertexVariables')

print('Loaded files')
print(len(df_ext))

df_ext = df_ext.join(good_df.set_index('run'),on='run')

df_ext_goodruns = df_ext
df_ext_goodruns_precuts = df_ext_goodruns

# df_ext_goodruns_precuts = df_ext_goodruns_precuts.sort_values('shower1_E_Y',ascending=False).drop_duplicates(RSE).sort_index()
print(len(df_ext_goodruns_precuts))

print('Merged')

df_ext_goodruns_precuts.to_pickle('../data/pickles/ext_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_ext,df_ext_goodruns,df_ext_goodruns_precuts

print('Cleaned up')

Loaded files
102390
102390
Merged
Saved Parquet
Cleaned up


## run1 open data

In [19]:
good_df = pd.read_csv('../data/goodruns_2020.txt')
good_df['good'] = 1

RSE=['run','subrun','event']

tag = 'Nov_6_open_run1'

df_data = read_root('/media/disk1/kmason/mcc9_v28_wctagger_5e19_finalbdt.root','dlana/FinalVertexVariables')
print (len(df_data))

print('Loaded files')
df_data = df_data.join(good_df.set_index('run'),on='run')
beamq_df = read_root('../data/beamdataquality_remix_bnb5e19.root','bdq')
df_data = df_data.join(beamq_df.set_index(['run','subrun','event']),on=['run','subrun','event'])

df_data_goodruns_precuts = df_data
print(len(df_data_goodruns_precuts))

print('Merged')

df_data_goodruns_precuts.to_pickle('../data/pickles/data_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_data,df_data_goodruns_precuts

print('Cleaned up')

46311
Loaded files
46311
Merged
Saved Parquet
Cleaned up


## High energy

In [20]:
RSE=['run','subrun','event']

good_df = pd.read_csv('../data/goodruns_2020.txt')
good_df['good'] = 1
tag = 'Nov_6_run1C_HE'
df_data = read_root('/media/disk1/kmason/mcc9_v29e_dl_run1_C1_bnb_dlfilter_highE_v1_1_3_fvv.root','dlana/FinalVertexVariables')

df_data_goodruns_precuts = df_data.join(good_df.set_index('run'),on='run')
df_data_goodruns_precuts.to_pickle('../data/pickles/data_goodruns_precuts_%s.pickle'%tag)
del df_data,df_data_goodruns_precuts

good_df = pd.read_csv('../data/goodruns_run2.txt')
good_df['good'] = 1
tag = 'Oct_26_run2D_HE'
df_data = read_root('/media/disk1/kmason/mcc9_v29e_dl_run2_D2_bnb_dlfilter_highE_v1_1_3_fvv.root','dlana/FinalVertexVariables')

df_data_goodruns_precuts = df_data.join(good_df.set_index('run'),on='run')
df_data_goodruns_precuts.to_pickle('../data/pickles/data_goodruns_precuts_%s.pickle'%tag)
del df_data,df_data_goodruns_precuts

good_df = pd.read_csv('../data/goodruns_run2.txt')
good_df['good'] = 1
tag = 'Oct_26_run2E_HE'
df_data = read_root('/media/disk1/kmason/mcc9_v29e_dl_run2_E1_bnb_dlfilter_highE_v1_1_3_fvv.root','dlana/FinalVertexVariables')

df_data_goodruns_precuts = df_data.join(good_df.set_index('run'),on='run')
df_data_goodruns_precuts.to_pickle('../data/pickles/data_goodruns_precuts_%s.pickle'%tag)
del df_data,df_data_goodruns_precuts

good_df = pd.read_csv('../data/goodruns_run3.txt')
good_df['good'] = 1
tag = 'Oct_26_run3F_HE'
df_data = read_root('/media/disk1/kmason/mcc9_v29e_dl_run3_F1_bnb_dlfilter_highE_v1_1_3_fvv.root','dlana/FinalVertexVariables')

df_data_goodruns_precuts = df_data.join(good_df.set_index('run'),on='run')
df_data_goodruns_precuts.to_pickle('../data/pickles/data_goodruns_precuts_%s.pickle'%tag)
del df_data,df_data_goodruns_precuts

good_df = pd.read_csv('../data/goodruns_run3.txt')
good_df['good'] = 1
tag = 'Oct_26_run3G_HE'
df_data = read_root('/media/disk1/kmason/mcc9_v29e_dl_run3_G1_bnb_dlfilter_highE_v1_1_3_fvv.root','dlana/FinalVertexVariables')

df_data_goodruns_precuts = df_data.join(good_df.set_index('run'),on='run')
df_data_goodruns_precuts.to_pickle('../data/pickles/data_goodruns_precuts_%s.pickle'%tag)
del df_data,df_data_goodruns_precuts


# numu run 2 Fix

In [21]:
tag = 'Nov_6_run2_numu'
good_df = pd.read_csv('../data/goodruns_run2.txt') # run2
good_df['good'] = 1

RSE=['run','subrun','event']

df_mc = read_root('/media/disk1/kmason/mcc9_v29e_dl_run2_bnb_nu_overlay_finalbdt.root','dlana/FinalVertexVariables')
print(len(df_mc))
df_full_goodruns_precuts = df_mc.join(good_df.set_index('run'),on='run')

# load up FVV and cv  weights file
df_mc_cvweight = read_root('../data/weights_forCV_v48_Sep24_bnb_nu_run2.root')

# this takes forever but it works. just do a crossword puzzle or something. it takes the FVV dataframe and adds a new column called "CVIndex" which identifies a unique row of the CV file
# basically, it looks at all the events with the same RSE and takes the one with the closest true neutrino energy
def bless_cvindex(row):
    df_subsec = df_mc_cvweight.query('run==%s and subrun==%s and event==%s'%(row['run'],row['subrun'],row['event']))   
    if(len(df_subsec) == 0):
        return np.nan
    if(len(df_subsec) == 1):
        return df_subsec.index[0]
    idx = np.argmin(np.abs(df_subsec['nu_energy_true'].values-row['MC_energyInit']))
    return df_subsec.index[idx]

# here we run the above function. consider making simple precuts on anything that doesn't depend on RSE beforehand to lighten the load (good runs, passsimplecuts,  etc)
df_full_goodruns_precuts.insert(0,'CVIndex',df_full_goodruns_precuts.apply(bless_cvindex,axis=1)) # this takes forever
# do all your other preprocessing stuff. here, i run the BDT and make a few new variables
# df_full_nodupes = df_full_goodruns_precuts.sort_values('bkgBDT_univ',ascending=True).drop_duplicates('CVIndex').sort_index())
# join the variables you want from the CV weight file using CVindex
df_full_cved = df_full_goodruns_precuts.join(df_mc_cvweight[['nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg','nu_energy_true','nu_L_true']],on='CVIndex')
# do your labeling like  you'd normally do it! now  all interaction modes/types are perfectly aligned
print(len(df_full_cved))

df_full_cved.to_pickle('../data/pickles/numu_goodruns_precuts_%s.pickle'%tag)

143672
143672


## finally data

In [22]:
# Time to load the good runs list
good_df = pd.read_csv('../data/goodruns_2020.txt') # run1
good_df['good'] = 1
RSE=['run','subrun','event']

tag = 'Nov_6_pi0box_run1'

df_data = read_root('/media/disk1/kmason/mcc9_v29e_dl_run1_C1_bnb_dlfilter_pi0_v1_1_3_fvv.root','dlana/FinalVertexVariables')
print (tag, "start length",len(df_data))
df_data = df_data.join(good_df.set_index('run'),on='run') #get good runs flag
df_data_goodruns = df_data.query("good==1")
df_data_goodruns_precuts = df_data_goodruns.query("PassPMTPrecut ==1 and PassShowerReco ==1 and InFiducial ==1")
# df_data_goodruns_precuts = df_data_goodruns_precuts.sort_values('shower1_E_Y',ascending=False).drop_duplicates(RSE).sort_index()
print (tag, "end length",len(df_data_goodruns_precuts))
df_data_goodruns_precuts.to_pickle('../data/pickles/data_goodruns_precuts_%s.pickle'%tag)
del df_data,df_data_goodruns,df_data_goodruns_precuts #clear out info

good_df = pd.read_csv('../data/goodruns_run2.txt')
good_df['good'] = 1
RSE=['run','subrun','event']

tag = 'Nov_6_pi0box_run2D'

df_data = read_root('/media/disk1/kmason/mcc9_v29e_dl_run2_D2_bnb_dlfilter_pi0_v1_1_3_fvv.root','dlana/FinalVertexVariables')
print (tag, "start length",len(df_data))
df_data = df_data.join(good_df.set_index('run'),on='run') #get good runs flag
df_data_goodruns = df_data.query("good==1")
df_data_goodruns_precuts = df_data_goodruns.query("PassPMTPrecut ==1 and PassShowerReco ==1 and InFiducial ==1")
# df_data_goodruns_precuts = df_data_goodruns_precuts.sort_values('shower1_E_Y',ascending=False).drop_duplicates(RSE).sort_index()
print (tag, "end length",len(df_data_goodruns_precuts))
df_data_goodruns_precuts.to_pickle('../data/pickles/data_goodruns_precuts_%s.pickle'%tag)
del df_data,df_data_goodruns,df_data_goodruns_precuts #clear out info

tag = 'Nov_6_pi0box_run2E'

df_data = read_root('/media/disk1/kmason/mcc9_v29e_dl_run2_E1_bnb_dlfilter_pi0_v1_1_3_fvv.root','dlana/FinalVertexVariables')
print (tag, "start length",len(df_data))
df_data = df_data.join(good_df.set_index('run'),on='run') #get good runs flag
df_data_goodruns = df_data.query("good==1")
df_data_goodruns_precuts = df_data_goodruns.query("PassPMTPrecut ==1 and PassShowerReco ==1 and InFiducial ==1")
# df_data_goodruns_precuts = df_data_goodruns_precuts.sort_values('shower1_E_Y',ascending=False).drop_duplicates(RSE).sort_index()
print (tag, "end length",len(df_data_goodruns_precuts))
df_data_goodruns_precuts.to_pickle('../data/pickles/data_goodruns_precuts_%s.pickle'%tag)
del df_data,df_data_goodruns,df_data_goodruns_precuts #clear out info

good_df = pd.read_csv('../data/goodruns_run3.txt')
good_df['good'] = 1
RSE=['run','subrun','event']

tag = 'Nov_6_pi0box_run3F'

df_data = read_root('/media/disk1/kmason/mcc9_v29e_dl_run3_F1_bnb_dlfilter_pi0_v1_1_3_fvv.root','dlana/FinalVertexVariables')
print (tag, "start length",len(df_data))
df_data = df_data.join(good_df.set_index('run'),on='run') #get good runs flag
df_data_goodruns = df_data.query("good==1")
df_data_goodruns_precuts = df_data_goodruns.query("PassPMTPrecut ==1 and PassShowerReco ==1 and InFiducial ==1")
# df_data_goodruns_precuts = df_data_goodruns_precuts.sort_values('shower1_E_Y',ascending=False).drop_duplicates(RSE).sort_index()
print (tag, "end length",len(df_data_goodruns_precuts))
df_data_goodruns_precuts.to_pickle('../data/pickles/data_goodruns_precuts_%s.pickle'%tag)
del df_data,df_data_goodruns,df_data_goodruns_precuts #clear out info

tag = 'Nov_6_pi0box_run3G'

df_data = read_root('/media/disk1/kmason/mcc9_v29e_dl_run3_G1_bnb_dlfilter_pi0_v1_1_3_fvv.root','dlana/FinalVertexVariables')
print (tag, "start length",len(df_data))
df_data = df_data.join(good_df.set_index('run'),on='run') #get good runs flag
df_data_goodruns = df_data.query("good==1")
df_data_goodruns_precuts = df_data_goodruns.query("PassPMTPrecut ==1 and PassShowerReco ==1 and InFiducial ==1")
# df_data_goodruns_precuts = df_data_goodruns_precuts.sort_values('shower1_E_Y',ascending=False).drop_duplicates(RSE).sort_index()
print (tag, "end length",len(df_data_goodruns_precuts))
df_data_goodruns_precuts.to_pickle('../data/pickles/data_goodruns_precuts_%s.pickle'%tag)
del df_data,df_data_goodruns,df_data_goodruns_precuts #clear out info

Nov_6_pi0box_run1 start length 1809
Nov_6_pi0box_run1 end length 1618
Nov_6_pi0box_run2D start length 2186
Nov_6_pi0box_run2D end length 2068
Nov_6_pi0box_run2E start length 701
Nov_6_pi0box_run2E end length 639
Nov_6_pi0box_run3F start length 507
Nov_6_pi0box_run3F end length 473
Nov_6_pi0box_run3G start length 2049
Nov_6_pi0box_run3G end length 1914


##Fake Data Pickles

In [16]:
tag = 'Dec_17_fake5_run1'
df_data = read_root('/media/disk1/kmason/mcc9_v29e_dl_set5_fakedata_run1_pi0_lowBDT_v1_1_3_fvv.root','dlana/FinalVertexVariables')
print (len(df_data))

good_df = pd.read_csv('../data/goodruns_2020.txt') # run1
good_df['good'] = 1
df_data = df_data.join(good_df.set_index('run'),on='run') #get good runs flag

print('Loaded files')
df_data_goodruns = df_data
df_data_goodruns_precuts = df_data_goodruns.query( "PassShowerReco ==1")
df_data_goodruns_precuts = df_data_goodruns_precuts.query("InFiducial ==1  ")
print(len(df_data_goodruns_precuts))


df_data_goodruns_precuts.to_pickle('../data/pickles/data_goodruns_precuts_%s.pickle'%tag)
print('Saved Parquet')
del df_data,df_data_goodruns,df_data_goodruns_precuts

tag = 'Dec_17_fake4_run1'
df_data = read_root('/media/disk1/kmason/mcc9_v29e_dl_set4_fakedata_run1_pi0_lowBDT_v1_1_3_fvv.root','dlana/FinalVertexVariables')
print (len(df_data))

good_df = pd.read_csv('../data/goodruns_2020.txt') # run1
good_df['good'] = 1
df_data = df_data.join(good_df.set_index('run'),on='run') #get good runs flag

print('Loaded files')
df_data_goodruns = df_data
df_data_goodruns_precuts = df_data_goodruns.query( "PassShowerReco ==1")
df_data_goodruns_precuts = df_data_goodruns_precuts.query("InFiducial ==1  ")
print(len(df_data_goodruns_precuts))


df_data_goodruns_precuts.to_pickle('../data/pickles/data_goodruns_precuts_%s.pickle'%tag)
print('Saved Parquet')
del df_data,df_data_goodruns,df_data_goodruns_precuts

tag = 'Dec_17_fake4_run3'
df_data = read_root('/media/disk1/kmason/mcc9_v29e_dl_set4_fakedata_run3b_pi0_lowBDT_v1_1_3_fvv.root','dlana/FinalVertexVariables')
print (len(df_data))


good_df = pd.read_csv('../data/goodruns_run3.txt') # run1
good_df['good'] = 1
df_data = df_data.join(good_df.set_index('run'),on='run') #get good runs flag
print('Loaded files')
df_data_goodruns = df_data
df_data_goodruns_precuts = df_data_goodruns.query( "PassShowerReco ==1")
df_data_goodruns_precuts = df_data_goodruns_precuts.query("InFiducial ==1  ")
print(len(df_data_goodruns_precuts))


df_data_goodruns_precuts.to_pickle('../data/pickles/data_goodruns_precuts_%s.pickle'%tag)
print('Saved Parquet')
del df_data,df_data_goodruns,df_data_goodruns_precuts



11075
Loaded files
10785
Saved Parquet
4481
Loaded files
4339
Saved Parquet
4331
Loaded files
4204
Saved Parquet


# Pi0 files

In [8]:
tag = 'Feb_1_run1_CCPi0'
good_df = pd.read_csv('../data/goodruns_2020.txt') # run1
good_df['good'] = 1
df_bnb = read_root('/media/disk1/kmason/mcc9_v40_CCPi0_run1_dlana_stripped.root','dlana/FinalVertexVariables')
print(len(df_bnb))
df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_cc_pi0_run1.root')
df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)
df_bnb.insert(0,'xsec_tune1_weight',df_bnb.apply(bless_tune1_reweight,axis=1))
df_bnb.insert(0,'mc_label',df_bnb.apply(bless_MC_labels,axis=1))
df_bnb.to_pickle('../data/pickles/%s.pickle'%tag)
del df_bnb,df_bnb_cvweight,good_df

# tag = 'Jan_15_run3_CCPi0'
# good_df = pd.read_csv('../data/goodruns_run3.txt') # run1
# good_df['good'] = 1
# df_bnb = read_root('/media/disk1/kmason/mcc9_v40_bnb_nu_overlay_run3_CCPi0_pi0_lowBDT_v1_1_3_fvv.root','dlana/FinalVertexVariables')
# print(len(df_bnb))
# df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_cc_pi0_run3.root')
# df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
# df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)
# df_bnb.insert(0,'xsec_tune1_weight',df_bnb.apply(bless_tune1_reweight,axis=1))
# df_bnb.insert(0,'mc_label',df_bnb.apply(bless_MC_labels,axis=1))
# df_bnb.to_pickle('../data/pickles/%s.pickle'%tag)
# del df_bnb,df_bnb_cvweight,good_df

# tag = 'Jan_15_run1_NCPi0'
# good_df = pd.read_csv('../data/goodruns_2020.txt') # run1
# good_df['good'] = 1
# df_bnb = read_root('/media/disk1/kmason/mcc9_v40_bnb_nu_overlay_run1_NCPi0_pi0_lowBDT_v1_1_3_fvv.root','dlana/FinalVertexVariables')
# print(len(df_bnb))
# df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_nc_pi0_run1.root')
# df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
# df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)
# df_bnb.insert(0,'xsec_tune1_weight',df_bnb.apply(bless_tune1_reweight,axis=1))
# df_bnb.insert(0,'mc_label',df_bnb.apply(bless_MC_labels,axis=1))
# df_bnb.to_pickle('../data/pickles/%s.pickle'%tag)
# del df_bnb,df_bnb_cvweight,good_df

# tag = 'Jan_15_run3a_NCPi0'
# good_df = pd.read_csv('../data/goodruns_run3.txt') # run1
# good_df['good'] = 1
# df_bnb = read_root('/media/disk1/kmason/mcc9_v40_bnb_nu_overlay_run3a_NCPi0_pi0_lowBDT_v1_1_3_fvv.root','dlana/FinalVertexVariables')
# print(len(df_bnb))
# df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_nc_pi0_run3.root')
# df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
# df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)
# df_bnb.insert(0,'xsec_tune1_weight',df_bnb.apply(bless_tune1_reweight,axis=1))
# df_bnb.insert(0,'mc_label',df_bnb.apply(bless_MC_labels,axis=1))
# df_bnb.to_pickle('../data/pickles/%s.pickle'%tag)
# del df_bnb,df_bnb_cvweight,good_df

# tag = 'Jan_15_run3b_NCPi0'
# good_df = pd.read_csv('../data/goodruns_2020.txt') # run1
# good_df['good'] = 1
# df_bnb = read_root('/media/disk1/kmason/mcc9_v40_bnb_nu_overlay_run3b_NCPi0_pi0_lowBDT_v1_1_3_fvv.root','dlana/FinalVertexVariables')
# print(len(df_bnb))
# df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_nc_pi0_run3.root')
# df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
# df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)
# df_bnb.insert(0,'xsec_tune1_weight',df_bnb.apply(bless_tune1_reweight,axis=1))
# df_bnb.insert(0,'mc_label',df_bnb.apply(bless_MC_labels,axis=1))
# df_bnb.to_pickle('../data/pickles/%s.pickle'%tag)
# del df_bnb,df_bnb_cvweight,good_df


ValueError: chain is empty

Error in <TFile::ReadBuffer>: error reading all requested bytes from file /media/disk1/kmason/mcc9_v40_CCPi0_run1_dlana_stripped.root, got 0 of 2335
Error in <TBranch::GetBasket>: File: /media/disk1/kmason/mcc9_v40_CCPi0_run1_dlana_stripped.root at byte:0, branch:AlphaTB_1e1p, entry:0, badread=1, nerrors=2, basketnumber=0


# high stats run1 numu

In [9]:
tag = 'Feb_4_run1_numu'
good_df = pd.read_csv('../data/goodruns_2020.txt') # run1
good_df['good'] = 1

RSE=['run','subrun','event']

df_bnb = read_root('/media/disk1/kmason/mcc9_v40_bnb_nu_overlay_run1_pi0_lowBDT_v1_1_3_fvv.root','dlana/FinalVertexVariables')
print(len(df_bnb))
df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_bnb_nu_run1.root')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
cols = df_bnb.columns.tolist()
print(cols)


print('Loaded Files')

df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)
print(df_bnb['xsec_corr_weight'].values)
df_numu_goodruns = df_bnb
print(len(df_numu_goodruns))

df_numu_goodruns_precuts = df_numu_goodruns
print(len(df_numu_goodruns_precuts))

print('Merged')

df_numu_goodruns_precuts.insert(0,'xsec_tune1_weight',df_numu_goodruns_precuts.apply(bless_tune1_reweight,axis=1))

print('Genie Reweighted')

df_numu_goodruns_precuts.insert(0,'mc_label',df_numu_goodruns_precuts.apply(bless_MC_labels,axis=1))
print ('MC Labeled')

df_numu_goodruns_precuts.to_pickle('../data/pickles/numu_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_bnb,df_bnb_cvweight,df_numu_goodruns_precuts

print('Cleaned up')

13315
['run', 'subrun', 'event', 'vtxid', 'Xreco', 'Yreco', 'Zreco', 'InFiducial', 'AnyReco', 'NTracks', 'N5cmTracks', 'PassSimpleCuts', 'PassShowerReco', 'PassSecondShower', 'FailedBoost', 'FailedBoost_1m1p', 'FailedBoost_1e1p', 'Good3DReco', 'Eta', 'OpenAng', 'Thetas', 'Phis', 'QCorrectionFactorVertex', 'ChargeNearTrunk', 'LongTrackLen', 'ShortTrackLen', 'MaxShrFrac', 'MinShrFrac', 'CCQEEnergyShift_1m1p', 'Enu_1m1p', 'PhiT_1m1p', 'AlphaT_1m1p', 'PT_1m1p', 'PTRat_1m1p', 'BjX_1m1p', 'BjY_1m1p', 'Q2_1m1p', 'Sph_1m1p', 'PzEnu_1m1p', 'Q0_1m1p', 'Q3_1m1p', 'OpenAngB_1m1p', 'ThetasB_1m1p', 'PhisB_1m1p', 'PhiTB_1m1p', 'AlphaTB_1m1p', 'PTB_1m1p', 'BjXB_1m1p', 'BjYB_1m1p', 'Q2B_1m1p', 'SphB_1m1p', 'CCQEEnergyShift_1e1p', 'Enu_1e1p', 'PhiT_1e1p', 'AlphaT_1e1p', 'PT_1e1p', 'PTRat_1e1p', 'BjX_1e1p', 'BjY_1e1p', 'Q2_1e1p', 'Sph_1e1p', 'PzEnu_1e1p', 'Q0_1e1p', 'Q3_1e1p', 'OpenAngB_1e1p', 'ThetasB_1e1p', 'PhisB_1e1p', 'PhiTB_1e1p', 'AlphaTB_1e1p', 'PTB_1e1p', 'BjXB_1e1p', 'BjYB_1e1p', 'Q2B_1e1p', 'S

In [7]:
tag = 'Feb_5_run1_numu_dlana'
good_df = pd.read_csv('../data/goodruns_2020.txt') # run1
good_df['good'] = 1

RSE=['run','subrun','event']

df_bnb = read_root('/media/disk1/kmason/mcc9_v40_bnb_nu_overlay_1Mil_run1_dlana_fvv_stripped.root','dlana/FinalVertexVariables')
print(len(df_bnb))
df_bnb_cvweight = read_root('../data/weights_forCV_v48_Sep24_bnb_nu_run1.root')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
cols = df_bnb.columns.tolist()
print(cols)


print('Loaded Files')

df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_energy_true','nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)
print(df_bnb['xsec_corr_weight'].values)
df_numu_goodruns = df_bnb
print(len(df_numu_goodruns))

df_numu_goodruns_precuts = df_numu_goodruns
print(len(df_numu_goodruns_precuts))

print('Merged')

df_numu_goodruns_precuts.insert(0,'xsec_tune1_weight',df_numu_goodruns_precuts.apply(bless_tune1_reweight,axis=1))

print('Genie Reweighted')

df_numu_goodruns_precuts.insert(0,'mc_label',df_numu_goodruns_precuts.apply(bless_MC_labels,axis=1))
print ('MC Labeled')

df_numu_goodruns_precuts.to_pickle('../data/pickles/numu_goodruns_precuts_%s.pickle'%tag)

print('Saved Parquet')

del df_bnb,df_bnb_cvweight,df_numu_goodruns_precuts

print('Cleaned up')

456355
['run', 'subrun', 'event', 'vtxid', 'Xreco', 'Yreco', 'Zreco', 'InFiducial', 'AnyReco', 'NTracks', 'N5cmTracks', 'PassSimpleCuts', 'PassShowerReco', 'PassSecondShower', 'FailedBoost', 'FailedBoost_1m1p', 'FailedBoost_1e1p', 'Good3DReco', 'Eta', 'OpenAng', 'Thetas', 'Phis', 'QCorrectionFactorVertex', 'ChargeNearTrunk', 'LongTrackLen', 'ShortTrackLen', 'MaxShrFrac', 'MinShrFrac', 'CCQEEnergyShift_1m1p', 'Enu_1m1p', 'PhiT_1m1p', 'AlphaT_1m1p', 'PT_1m1p', 'PTRat_1m1p', 'BjX_1m1p', 'BjY_1m1p', 'Q2_1m1p', 'Sph_1m1p', 'PzEnu_1m1p', 'Q0_1m1p', 'Q3_1m1p', 'OpenAngB_1m1p', 'ThetasB_1m1p', 'PhisB_1m1p', 'PhiTB_1m1p', 'AlphaTB_1m1p', 'PTB_1m1p', 'BjXB_1m1p', 'BjYB_1m1p', 'Q2B_1m1p', 'SphB_1m1p', 'CCQEEnergyShift_1e1p', 'Enu_1e1p', 'PhiT_1e1p', 'AlphaT_1e1p', 'PT_1e1p', 'PTRat_1e1p', 'BjX_1e1p', 'BjY_1e1p', 'Q2_1e1p', 'Sph_1e1p', 'PzEnu_1e1p', 'Q0_1e1p', 'Q3_1e1p', 'OpenAngB_1e1p', 'ThetasB_1e1p', 'PhisB_1e1p', 'PhiTB_1e1p', 'AlphaTB_1e1p', 'PTB_1e1p', 'BjXB_1e1p', 'BjYB_1e1p', 'Q2B_1e1p', '