In [1]:
%matplotlib inline
import ROOT
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import numpy as np
from numpy import mean
from math import sqrt,acos,cos,sin,pi,exp,log,isnan,atan2
#from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from numpy import asarray
from root_pandas import read_root
from matplotlib import gridspec
from scipy import stats
 
from datetime import date

datafolder = '/media/disk1/kmason/detvars_highstats'
auxfolder = '../../data'
dumpfolder = '.'

RSE  = ['run','subrun','event']
RSEV = ['run','subrun','event','vtxid']
RSECV = ['run','subrun','event',"isCV"]

Welcome to JupyROOT 6.14/08


In [2]:
def bless_MC_labels(row):
    mclabel = ''
    intlabel = ''
    parentlabel = ''
    pizero = [1090,1086,1090,1080,1015,1013,1011,1008,1006,1004]
    piplusminus = [1085,1079,1032,1017,1014,1007,1005,1003,1028,1021,1016,1012,1010,1009]
        
    if abs(row['nu_pdg']) == 12:
        intlabel = 'nue'
    elif abs(row['nu_pdg']) == 14:
        intlabel = 'numu'
        
    if not (row['MC_nproton']==1 and row['MC_nlepton']==1):
        return 'nLmP'
    elif not 0 < row['MC_scedr'] <= 5.0:
        return 'offvtx'
    elif not abs((row['MC_energyInit']-row['Enu_1m1p'])/row['MC_energyInit']) < 0.2:
        return 'badreco'    
    else:
        if row['nu_interaction_type'] == 1001:
            mclabel = 'CCQE'
        elif row['nu_interaction_type'] == 1000:
            mclabel = 'MEC'
        elif row['nu_interaction_type'] in pizero:
            mclabel = 'pizero'
        elif row['nu_interaction_type'] in piplusminus:
            mclabel = 'piplusminus' 
        else:
            mclabel = 'other'
    return '%s_%s'%(intlabel,mclabel)

def bless_int_labels(row):
    mclabel = ''
    intlabel = ''
    parentlabel = ''
    pizero = [1090,1086,1090,1080,1015,1013,1011,1008,1006,1004]
    piplusminus = [1085,1079,1032,1017,1014,1007,1005,1003,1028,1021,1016,1012,1010,1009]
        
    if abs(row['nu_pdg']) == 12:
        intlabel = 'nue'
    elif abs(row['nu_pdg']) == 14:
        intlabel = 'numu'
        
    if row['nu_interaction_type'] == 1001:
        mclabel = 'CCQE'
    elif row['nu_interaction_type'] == 1000:
        mclabel = 'MEC'
    elif row['nu_interaction_type'] in pizero:
        mclabel = 'pizero'
    elif row['nu_interaction_type'] in piplusminus:
        mclabel = 'piplusminus' 
    else:
        mclabel = 'other'
           
    return '%s_%s'%(intlabel,mclabel)



In [3]:
tag = date.today()

# All Precuts
precuts ='PassSimpleCuts == 1 and ChargeNearTrunk > 0'
pmtprecuts = 'TotPE > 20 and PorchTotPE < 20'
s_precut = precuts + ' and ' + pmtprecuts

In [4]:
xsec_mcc9_file = ROOT.TFile('../../data/xsec_graphs_tune1.root')
xsec_mcc9_graph_numu = xsec_mcc9_file.Get('../nu_mu_Ar40/qel_cc_n')
xsec_mcc9_graph_nue = xsec_mcc9_file.Get('../nu_e_Ar40/qel_cc_n')
xsec_tune1_file = ROOT.TFile('../../data/xsec_graphs_mcc9_v304.root')
xsec_tune1_graph_numu = xsec_tune1_file.Get('nu_mu_Ar40/qel_cc_n')
xsec_tune1_graph_nue = xsec_tune1_file.Get('nu_e_Ar40/qel_cc_n')

# Time to load the good runs list
good_run1_df = pd.read_csv('../../data/goodruns_2020.txt')
good_run2_df = pd.read_csv('../../data/goodruns_run2.txt')
good_run3_df = pd.read_csv('../../data/goodruns_run3.txt')

good_run1_df['good'] = 1
good_run2_df['good'] = 1
good_run3_df['good'] = 1

In [5]:
def proc_detvar_mc(df_dlana,df_cv,df_goodrun,df_wgts,POTCV,POTDV):
    
    df_cv['isCV'] = True
    df_dlana['isCV'] = False
    df_cv['POT']  = POTCV
    df_dlana['POT'] = POTDV
    df_full = pd.concat((df_cv,df_dlana))
    print('df_full = %d  cv = %d , dv = %d'%(df_full.shape[0],df_cv.shape[0],df_dlana.shape[0]))
    df_full = df_full.join(df_wgts.set_index(RSE)[['nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg','nu_energy_true']],on=RSE)
    #df_full = pd.concat((df_dlana[df_dlana[RSE].isin(df_cv[RSE])],df_cv[df_cv[RSE].isin(df_dlana[RSE])])) 
    df_full_wGoodruns = df_full.join(df_goodrun.set_index('run'),on='run')
    df_full_goodruns = df_full_wGoodruns.query('good == 1')

    df_full_goodruns.insert(0,'PassPrecutsPi0',df_full_goodruns.apply(passPrecut,axis=1)) 
    
    #    df_full_goodruns = df_full_goodruns.query(s_precut)    # apply precuts
    
    
#    if(isMC):
#        df_full_goodruns.insert(0,'mc_label',df_full_goodruns.apply(bless_MC_labels,axis=1))
#        df_full_goodruns.insert(0,'int_label',df_full_goodruns.apply(bless_int_labels,axis=1))
       
    # add a bunch of helpful variables!
    df_full_goodruns.insert(0,'MPID_eminus',[ef.max() for ef in df_full_goodruns['EminusPID_int_v'].values])
    df_full_goodruns.insert(0,'MPID_muon',[ef.max() for ef in df_full_goodruns['MuonPID_int_v'].values])
    df_full_goodruns.insert(0,'MPID_proton',[ef.max() for ef in df_full_goodruns['ProtonPID_int_v'].values])
    df_full_goodruns.insert(0,'MPID_gamma',[ef.max() for ef in df_full_goodruns['GammaPID_int_v'].values])
    df_full_goodruns.insert(0,'MPID_pion',[ef.max() for ef in df_full_goodruns['PionPID_int_v'].values])
    df_full_goodruns.insert(0,'Lepton_CosTheta',np.cos(df_full_goodruns['Lepton_ThetaReco'].values).tolist())
    df_full_goodruns.insert(0,'Proton_CosTheta',np.cos(df_full_goodruns['Proton_ThetaReco'].values).tolist())
    
    print('df_full_goodruns = %d  cv = %d , dv = %d'%(df_full_goodruns.shape[0],df_full_goodruns.query("isCV==True").shape[0],df_full_goodruns.query("isCV==False").shape[0]))

    return df_full_goodruns

def passPrecut(row):
    #orthogonalcut = 'MaxShrFrac < .2'
    #precuts ='PassSimpleCuts == 1 and ChargeNearTrunk > 0 and FailedBoost != 1 and OpenAng > .5 and '
    #s_precut = orthogonalcut + ' and ' + precuts
    return (row['PassSimpleCuts'] == 1 and row['ChargeNearTrunk'] > 0 )


In [6]:
# functions I need to add in the pi0 variables
def showerE_v2(df):
    energy1 = []
    energy2 = []
    currentslope = 0.013456
    currentbias = 2.06955
    for idx in range(len(df)):
        oldE1 = df['shower1_E_Y'].values[idx]
        oldE2 = df['shower2_E_Y'].values[idx]
        A1 = (oldE1-currentbias)/currentslope
        A2 = (oldE2-currentbias)/currentslope
        energy1.append(A1*0.01255796)
        energy2.append(A2*0.01255796)
    return energy1, energy2

# delta mass functions
def gamma4vectorshower1(df,energy_v):
    allvectors = []
    for idx in range(0,len(df)):
        energy = energy_v[idx]
        xdir = df['shower1_dir_3d_X'].values[idx]
        ydir = df['shower1_dir_3d_Y'].values[idx]
        zdir = df['shower1_dir_3d_Z'].values[idx]
        #check magnitude
        mag = sqrt(xdir*xdir+ydir*ydir+zdir*zdir)
        if (mag != 0):
            singlevector = [energy,(xdir/mag)*energy,(ydir/mag)*energy,(zdir/mag)*energy]
        else:
            singlevector = [-9999,-9999,-9999,-9999]
        allvectors.append(singlevector)
    return allvectors

def gamma4vectorshower2(df,energy_v):
    allvectors = []
    for idx in range(0,len(df)):
        energy = energy_v[idx]
        xdir = df['shower2_dir_3d_X'].values[idx]
        ydir = df['shower2_dir_3d_Y'].values[idx]
        zdir = df['shower2_dir_3d_Z'].values[idx]
        #check magnitude
        mag = sqrt(xdir*xdir+ydir*ydir+zdir*zdir)
        if (mag != 0):
            singlevector = [energy,(xdir/mag)*energy,(ydir/mag)*energy,(zdir/mag)*energy]
        else:
            singlevector = [-9999,-9999,-9999,-9999]
        allvectors.append(singlevector)
    return allvectors

# now turn them into pi0 vectors
def pi04vector(shower1,shower2):
    allvectors = []
    for evt in range(len(shower1)):
        shower1_v = shower1[evt]
        shower2_v = shower2[evt]
        if(shower1_v[0] > -9999 and shower1_v[0] > -9999):
            singlevector = [shower1_v[0]+shower2_v[0],shower1_v[1]+shower2_v[1],shower1_v[2]+shower2_v[2],shower1_v[3]+shower2_v[3]]
        else:
            singlevector = [-9999,-9999,-9999,-9999]
        allvectors.append(singlevector)
    return allvectors

# make proton 4 vector
def proton4vector(df):
    allvectors = []
    for idx in range(0,len(df)):
        energy = df['Proton_Edep'].values[idx]+938
        momentum = 0
        if (energy>0):
            momentum = sqrt(energy*energy -(938*938))
        theta = df['Proton_ThetaReco'].values[idx]
        phi = df['Proton_PhiReco'].values[idx]
        xdir = sin(theta)*cos(phi)
        ydir = sin(theta)*sin(phi)
        zdir = cos(theta)
        #check magnitude
        mag = sqrt(xdir*xdir+ydir*ydir+zdir*zdir)
        singlevector = [energy,(xdir/mag)*momentum,(ydir/mag)*momentum,(zdir/mag)*momentum]
        allvectors.append(singlevector)
    return allvectors

def delta4vector(proton,pi0):
    allvectors = []
    for evt in range(len(proton)):
        energy = proton[evt][0]+pi0[evt][0]
        momx = proton[evt][1]+pi0[evt][1]
        momy = proton[evt][2]+pi0[evt][2]
        momz = proton[evt][3]+pi0[evt][3]
        if (pi0[evt][0] >-9999):
            singlevector = [energy,momx,momy,momz]
        else:
            singlevector = [-9999,-9999,-9999,-9999]
        allvectors.append(singlevector)
    return allvectors

# get delta rest mass

def deltarestmass(delta):
    restmass = 0
    mass_v=[]
    for evt in range(len(delta)):
        energy = delta[evt][0]
        momx = delta[evt][1]
        momy = delta[evt][2]
        momz = delta[evt][3]
        mom = 0
        if (momx >0 or momy>0 or momz>0):
            mom = sqrt(momx*momx+momy*momy+momz*momz)
        if (energy >-9999):
            mass = sqrt(energy*energy-mom*mom)
        else:
            mass = -9999
        mass_v.append(mass)
    return mass_v

#new pi0 mass
def pi0mass_v2(df):
    mass_v=[]
    for idx in range(len(df)):
        oldm = df['_pi0mass'].values[idx]
        if oldm >0:
            E1 = df['shower1_E_Y_new'].values[idx]
            E2 = df['shower2_E_Y_new'].values[idx]
            alpha = df["_shower_alpha"].values[idx]
            C = 4*sin(alpha/2.0)*sin(alpha/2.0)
            newmass = sqrt(C*E1*E2)
        else:
            newmass = oldm
        
        mass_v.append(newmass)
   
    return mass_v

# reco momentum and energy
def recomomentum_v2(df):
    momentum_v = []
    for x in range(0,len(df)):
        E1 = df["shower1_E_Y_new"].values[x]
        E2 = df["shower2_E_Y_new"].values[x]
        x1 = df['shower1_dir_3d_X'].values[x]
        y1 = df['shower1_dir_3d_Y'].values[x]
        z1 = df['shower1_dir_3d_Z'].values[x]
        x2 = df['shower2_dir_3d_X'].values[x]
        y2 = df['shower2_dir_3d_Y'].values[x]
        z2 = df['shower2_dir_3d_Z'].values[x]
        #check magnitude
        mag1 = sqrt(x1*x1+y1*y1+z1*z1)
        mag2 = sqrt(x2*x2+y2*y2+z2*z2)
        if E1<0 or E2<0 or mag1 <=0 or mag2 <=0:
            momentum_v.append(-9999)
        else:
            singlevector1 = [(x1/mag1)*E1,(y1/mag1)*E1,(z1/mag1)*E1]
            singlevector2 = [(x2/mag2)*E2,(y2/mag2)*E2,(z2/mag2)*E2]
            momentumpi0 = [singlevector1[0]+singlevector2[0],singlevector1[1]+singlevector2[1],singlevector1[2]+singlevector2[2]]
            momentum = sqrt(momentumpi0[0]**2+momentumpi0[1]**2+momentumpi0[2]**2)
            momentum_v.append(momentum)
    return momentum_v


def recopi0energy_v2(df):
    var = []
    for x in range(len(df)):
        m = df["_pi0mass_new"].values[x]
        E1 = df["shower1_E_Y_new"].values[x]
        E2 = df["shower2_E_Y_new"].values[x]
        theta = df['_shower_alpha'].values[x]
        if (m<=0):
            var.append(-9999)
        else:
            alpha = abs(E1-E2)/(E1+E2)
            Energy = m*sqrt(2.0/((1-alpha**2)*(1-cos(theta))))
            var.append(Energy)
    return var

# MPID values
def get_ge(df):
    ge  = []
    ge_norm = []
    muon= []
    
    for idx in range(0,len(df)):
        g = df['GammaPID_pix_v'].values[idx][2]
        e = df['EminusPID_pix_v'].values[idx][2]
        muon.append(df['MuonPID_pix_v'].values[idx][2])
        if e>0:
            ge.append(np.log(g/e))
            ge_norm.append(g/(e+g))
        else:
            ge.append(-1.0)
            ge_norm.append(-1.0)
        
    
    return ge,ge_norm,muon


def addPi0Variables(df):
#     this main function adds in all the variables needed, calling all helper functions.
    #new shower energy
    E1,E2 = showerE_v2(df)
    df['shower1_E_Y_new'] = E1
    df['shower2_E_Y_new'] = E2
    shower1 = gamma4vectorshower1(df,df['shower1_E_Y_new'].values)
    shower2 = gamma4vectorshower2(df,df['shower2_E_Y_new'].values)
    pi0 = pi04vector(shower1,shower2)
    proton = proton4vector(df)
    delta = delta4vector(proton,pi0)
    mass = deltarestmass(delta)
    df['DeltaMass_new'] = mass
    newm = pi0mass_v2(df)
    df['_pi0mass_new'] = newm
    recom2= recomomentum_v2(df)
    recoenergy2 = recopi0energy_v2(df)
    df['pi0_energy_reco_new'] = recoenergy2
    df['pi0_momentum_reco_new'] = recom2
    ge,ge_norm,muon= get_ge(df)
    df['MPID_ge'] = ge
    df['MPID_ge_norm']= ge_norm
    df['MPID_muon_new']= muon
    
    return df


In [7]:
run = 'run3'
run3POT = {
    "cv_500": 5.79534847421e20,
    "cv_1000": 8.98773223801e+20,
    "LYRayleigh": 1.30830317061e+21,
    "wiremodThetaXZ" : 1.29697772584e+21,
    "wiremodThetaYZ" : 1.26402704519e+21,
    "wiremodYZ" : 1.31097060608e+21,
    "wiremodX" : 1.30915319237e+21,
    "LYAtt" : 1.21283308703e+21,
    "LYdown" :1.21808316832e+21,
    "SCE" : 5.89862945464e20,
    "recomb" :6.37062455184e20
}
df_mc_cvweight_500 = read_root('../../data/weights_forCV_v48_Sep24_bnb_nu_DetVar_run3.root')
df_mc_cvweight_1000 = read_root('../../data/weights_forCV_v48_Sep24_bnb_nu_run3.root')

df_cv_500 = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_CV_v1_1_3_fvv_pi0_lowBDT.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_cv_500)
print(df_cv_500.shape[0])
print('done cv500')


df_cv_1000 = read_root('%s/../mcc9_v29e_dl_run3b_bnb_nu_overlay_nocrtremerge_finalbdt.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_cv_1000)
print(df_cv_1000.shape[0])
print('done cv1000')

s_data = 'wiremodThetaXZ'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_WireModThetaXZ_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000,run3POT["cv_1000"],run3POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])

s_data = 'wiremodThetaYZ'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_WireModThetaYZ_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000,run3POT["cv_1000"],run3POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])

s_data = 'wiremodYZ'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_WireModYZ_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000,run3POT["cv_1000"],run3POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])

s_data = 'wiremodX'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_WireModX_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000,run3POT["cv_1000"],run3POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])


s_data = 'LYRayleigh'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_LYRayleigh_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000,run3POT["cv_1000"],run3POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])

s_data = 'LYAtt'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_LYAttenuation_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000,run3POT["cv_1000"],run3POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])

s_data = 'LYdown'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_LYDown_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000,run3POT["cv_1000"],run3POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])


s_data = 'SCE'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_SCE_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_500,good_run3_df,df_mc_cvweight_500,run3POT["cv_500"],run3POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print(df_bnb_wCV_wPass_wProc.shape[0])
print('done %s'%s_data)

s_data = 'recomb'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3_detvar_Recomb2_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_500,good_run3_df,df_mc_cvweight_500,run3POT["cv_500"],run3POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])
df_bnb_wCV_wPass_wProc
print('----done all----')

5823
done cv500
314302
done cv1000
df_full = 325834  cv = 314302 , dv = 11532
df_full_goodruns = 326568  cv = 315002 , dv = 11566
done wiremodThetaXZ
326568
df_full = 327163  cv = 314302 , dv = 12861
df_full_goodruns = 327883  cv = 315002 , dv = 12881
done wiremodThetaYZ
327883
df_full = 327134  cv = 314302 , dv = 12832
df_full_goodruns = 327860  cv = 315002 , dv = 12858
done wiremodYZ
327860
df_full = 327195  cv = 314302 , dv = 12893
df_full_goodruns = 327918  cv = 315002 , dv = 12916
done wiremodX
327918
df_full = 326723  cv = 314302 , dv = 12421
df_full_goodruns = 327469  cv = 315002 , dv = 12467
done LYRayleigh
327469
df_full = 325247  cv = 314302 , dv = 10945
df_full_goodruns = 325976  cv = 315002 , dv = 10974
done LYAtt
325976
df_full = 325016  cv = 314302 , dv = 10714
df_full_goodruns = 325739  cv = 315002 , dv = 10737
done LYdown
325739
df_full = 11684  cv = 5823 , dv = 5861
df_full_goodruns = 11684  cv = 5823 , dv = 5861
11684
done SCE
df_full = 12348  cv = 5823 , dv = 6525
df

In [8]:
run = 'run1'
run1POT = {
    "cv_500": 6.0936273419e20,
    "cv_1000": 8.98773223801e+20,
    "LYRayleigh": 6.74276740522e20,
    "wiremodThetaXZ" : 1.29697772584e+21,
    "wiremodThetaYZ" : 1.26402704519e+21,
    "wiremodYZ" : 1.31097060608e+21,
    "wiremodX" : 1.30915319237e+21,
    "LYdown" :5.94452048286000000000e20,
    "SCE" : 6.21993777143e20,
    "recomb" :6.27484668519e20
}


df_mc_cvweight = read_root('%s/weights_forCV_v48_Sep24_bnb_nu_DetVar_run1.root'%auxfolder)
df_mc_cvweight_1000 = read_root('%s/weights_forCV_v48_Sep24_bnb_nu_run3.root'%auxfolder)

df_cv_500 = read_root('%s/mcc9_bnb_nu_overlay_CV_run1_500k_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_cv_500)
print('done cv500')

df_cv_1000 = read_root('%s/../mcc9_v29e_dl_run3b_bnb_nu_overlay_nocrtremerge_finalbdt.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_cv_1000)
print(df_cv_1000.shape[0])
print('done cv1000')


# ---------------------------------------- Create Wiremod compared to run3 and add to run1 -----------------------
s_data = 'wiremodThetaXZ'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_WireModThetaXZ_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000,run1POT["cv_1000"],run1POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])

s_data = 'wiremodThetaYZ'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_WireModThetaYZ_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000,run1POT["cv_1000"],run1POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])

s_data = 'wiremodYZ'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_WireModYZ_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000,run1POT["cv_1000"],run1POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])

s_data = 'wiremodX'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run3b_detvar_WireModX_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_1000,good_run3_df,df_mc_cvweight_1000,run1POT["cv_1000"],run1POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])



s_data = 'LYRayleigh'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run1_detvar_LYRayleigh_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_500,good_run1_df,df_mc_cvweight,run1POT["cv_500"],run1POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])

s_data = 'LYdown'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run1_detvar_LYDown_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_500,good_run1_df,df_mc_cvweight,run1POT["cv_500"],run1POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])



s_data = 'SCE'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run1_detvar_SCE_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_500,good_run1_df,df_mc_cvweight,run1POT["cv_500"],run1POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print(df_bnb_wCV_wPass_wProc.shape[0])
print('done %s'%s_data)

s_data = 'recomb'
df_bnb = read_root('%s/mcc9_v40_bnb_nu_overlay_run1_detvar_Recomb2_pi0_lowBDT_v1_1_3_fvv.root'%datafolder,'dlana/FinalVertexVariables')
addPi0Variables(df_bnb)
df_bnb_wCV_wPass_wProc = proc_detvar_mc(df_bnb,df_cv_500,good_run1_df,df_mc_cvweight,run1POT["cv_500"],run1POT[s_data])
df_bnb_wCV_wPass_wProc.to_parquet('%s/pickles/%s_%s_%s.parquet'%(datafolder,s_data,run,tag))
print('done %s'%s_data)
print(df_bnb_wCV_wPass_wProc.shape[0])

print('----done all----')

done cv500
314302
done cv1000
df_full = 325834  cv = 314302 , dv = 11532
df_full_goodruns = 326568  cv = 315002 , dv = 11566
done wiremodThetaXZ
326568
df_full = 327163  cv = 314302 , dv = 12861
df_full_goodruns = 327883  cv = 315002 , dv = 12881
done wiremodThetaYZ
327883
df_full = 327134  cv = 314302 , dv = 12832
df_full_goodruns = 327860  cv = 315002 , dv = 12858
done wiremodYZ
327860
df_full = 327195  cv = 314302 , dv = 12893
df_full_goodruns = 327918  cv = 315002 , dv = 12916
done wiremodX
327918
df_full = 13066  cv = 6200 , dv = 6866
df_full_goodruns = 13066  cv = 6200 , dv = 6866
done LYRayleigh
13066
df_full = 11936  cv = 6200 , dv = 5736
df_full_goodruns = 11936  cv = 6200 , dv = 5736
done LYdown
11936
df_full = 12631  cv = 6200 , dv = 6431
df_full_goodruns = 12631  cv = 6200 , dv = 6431
12631
done SCE
df_full = 13079  cv = 6200 , dv = 6879
df_full_goodruns = 13079  cv = 6200 , dv = 6879
done recomb
13079
----done all----
