In [1]:
%matplotlib inline

from ROOT import TFile,vector,TGraph
import ROOT
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import numpy as np
from numpy import mean
from math import sqrt,acos,cos,sin,pi,exp,log,isnan,atan2
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from numpy import asarray
from root_pandas import read_root
from matplotlib import gridspec
from scipy import stats

Welcome to JupyROOT 6.18/04


In [2]:
def bless_tune1_reweight(row):
    rw = float(1)
    if row['nu_interaction_mode']==1001 and (row['nu_pdg']==14 or row['nu_pdg']==-14):
        rw = xsec_tune1_graph_numu.Eval(row['MC_energyInit']) / xsec_mcc9_graph_numu.Eval(row['MC_energyInit'])
    if row['nu_interaction_mode']==1001 and (row['nu_pdg']==12 or row['nu_pdg']==-12):
        rw = xsec_tune1_graph_nue.Eval(row['MC_energyInit']) / xsec_mcc9_graph_nue.Eval(row['MC_energyInit'])
    return rw

def bless_MC_labels(row):
    mclabel = ''
    intlabel = ''
    parentlabel = ''
    pizero = [1090,1086,1090,1080,1015,1013,1011,1008,1006,1004]
    piplusminus = [1085,1079,1032,1017,1014,1007,1005,1003,1028,1021,1016,1012,1010,1009]
        
    if abs(row['nu_pdg']) == 12:
        intlabel = 'nue'
    elif abs(row['nu_pdg']) == 14:
        intlabel = 'numu'
    
    if not 0 < row['MC_scedr'] <= 5.0 :
        mclabel = 'offvtx'
    else:
        if row['nu_interaction_type'] == 1001:
            mclabel = 'CCQE'
        elif row['nu_interaction_type'] == 1000:
            mclabel = 'MEC'
        elif row['nu_interaction_type'] in pizero:
            mclabel = 'pizero'
        elif row['nu_interaction_type'] in piplusminus:
            mclabel = 'piplusminus' 
        else:
            mclabel = 'other'
            
    return '%s_%s'%(intlabel,mclabel)

In [3]:
# Time to load the good runs list
good_df = pd.read_csv('../data/goodruns_2020.txt')
good_df['good'] = 1

RSE=['run','subrun','event']

In [4]:
xsec_mcc9_file = ROOT.TFile('../data/xsec_graphs_mcc9_broken.root')
xsec_mcc9_graph_numu = xsec_mcc9_file.Get('nu_mu_Ar40/qel_cc_n')
xsec_mcc9_graph_nue = xsec_mcc9_file.Get('nu_e_Ar40/qel_cc_n')
xsec_tune1_file = ROOT.TFile('../data/xsec_graphs_tune1.root')
xsec_tune1_graph_numu = xsec_tune1_file.Get('nu_mu_Ar40/qel_cc_n')
xsec_tune1_graph_nue = xsec_tune1_file.Get('nu_e_Ar40/qel_cc_n')

In [5]:
# Beam quality
beamq_df = read_root('../data/beamdataquality_remix_bnb5e19.root','bdq')

In [6]:
tag = 'Feb21-final'

# MC BNB OVERLAY

In [7]:
df_bnb = read_root('../data/mcc9_v13_bnb_overlay/FVV-Prime-bnb-feb13-allclean.root')
df_bnb_cvweight = read_root('../data/mcc9_v13_bnb_overlay/weights_forCV_v33_bnb_nu_run1.root')

print('Loaded Files')

df_bnb = df_bnb.join(good_df.set_index('run'),on='run')
df_bnb = df_bnb.join(df_bnb_cvweight.set_index(RSE)[['nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)

# remove nue cc events
df_numu = df_bnb.query('not (nu_interaction_ccnc==0 and abs(nu_pdg)==12)')
df_numu_goodruns = df_numu.query("good==1")
df_numu_goodruns_precuts = df_numu_goodruns.query("PassPMTPrecut==1 and PassSimpleCuts==1")

print('Merged')

df_numu_goodruns_precuts.insert(0,'xsec_tune1_weight',df_numu_goodruns_precuts.apply(bless_tune1_reweight,axis=1))

print('Genie Reweighted')

df_numu_goodruns_precuts.insert(0,'mc_label',df_numu_goodruns_precuts.apply(bless_MC_labels,axis=1))

print ('MC Labeled')

Loaded Files
Merged
Genie Reweighted
MC Labeled


In [8]:
df_numu_goodruns_precuts.to_parquet('../data/pickles/numu_goodruns_precuts_%s.parquet'%tag)

print('Saved Parquet')

del df_bnb,df_bnb_cvweight,df_numu_goodruns_precuts

print('Cleaned up')

Saved Parquet
Cleaned up


# DIRT

In [9]:
df_dirt = read_root('../data/mcc9_v13_overlay_dirt_run1/FVV-Prime-dirt-Feb21.root')
df_weights_dirt = read_root('../data/mcc9_v13_overlay_dirt_run1/weights_forCV_v33_dirt_nu_run1.root')

print('Loaded Files')

df_dirt = df_dirt.join(good_df.set_index('run'),on='run')
df_dirt = df_dirt.join(df_weights_dirt.set_index(RSE)[['nu_interaction_mode','xsec_corr_weight','spline_weight','nu_pdg']],on=RSE)

df_dirt_goodruns = df_dirt.query("good==1")
df_dirt_goodruns_precuts = df_dirt_goodruns.query("PassPMTPrecut==1 and PassSimpleCuts==1")

print('Merged')

df_dirt_goodruns_precuts.insert(0,'xsec_tune1_weight',df_dirt_goodruns_precuts.apply(bless_tune1_reweight,axis=1))
df_dirt_goodruns_precuts.insert(0,'mc_label',df_dirt_goodruns_precuts.apply(bless_MC_labels,axis=1))

Loaded Files
Merged


In [10]:
df_dirt_goodruns_precuts.to_parquet('../data/pickles/dirt_goodruns_precuts_%s.parquet'%tag)

print('Saved parquet')

del df_dirt, df_dirt_goodruns,df_dirt_goodruns_precuts

print('Cleaned up')

Saved parquet
Cleaned up


# MC NUE OVERLAY

In [11]:
df_nue = read_root('../data/mcc9_v13_nueintrinsic_overlay/FVV-Prime-nue-Feb13-allclean.root')
df_nue_cvweights = read_root('../data/mcc9_v13_nueintrinsic_overlay/weights_forCV_v33_intrinsic_nue_run1.root')

print('Loaded Files')

df_nue = df_nue.join(good_df.set_index('run'),on='run')
df_nue = df_nue.join(df_nue_cvweights.set_index(RSE)[['nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_pdg']],on=RSE)

df_nue_goodruns = df_nue.query("good==1")
df_nue_goodruns_precuts = df_nue_goodruns.query("PassPMTPrecut==1 and PassSimpleCuts==1")

print('Merged')

df_nue_goodruns_precuts.insert(0,'xsec_tune1_weight',df_nue_goodruns_precuts.apply(bless_tune1_reweight,axis=1))
df_nue_goodruns_precuts.insert(0,'mc_label',df_nue_goodruns_precuts.apply(bless_MC_labels,axis=1))

Loaded Files
Merged


In [12]:
df_nue_goodruns_precuts.to_parquet('../data/pickles/nue_goodruns_precuts_%s.parquet'%tag)

print('Saved Parquet')

del df_nue,df_nue_goodruns_precuts,df_nue_cvweights

print('Cleaned up')

Saved Parquet
Cleaned up


# EXT

In [13]:
df_ext = read_root('../data/mcc9jan_extbnb/FVV-Prime-ext-Feb13-allclean.root')

print('Loaded files')

df_ext = df_ext.join(good_df.set_index('run'),on='run')

df_ext_goodruns = df_ext.query("good==1")
df_ext_goodruns_precuts = df_ext_goodruns.query("PassPMTPrecut==1 and PassSimpleCuts==1")

print('Merged')

# restrict to 5e19 run range
#df_ext_goodruns_pmtprecut = df_ext_goodruns_pmtprecut.query('run>=5119 and run<=5955')

Loaded files
Merged


In [14]:
df_ext_goodruns_precuts.to_parquet('../data/pickles/ext_goodruns_precuts_%s.parquet'%tag)

print('Saved Parquet')

del df_ext,df_ext_goodruns,df_ext_goodruns_precuts

print('Cleaned up')

Saved Parquet
Cleaned up


# Data

In [15]:
df_data = read_root('../data/mcc9jan_bnb5e19/FVV-Prime-data-Feb21.root')

print('Loaded files')

df_data = df_data.join(good_df.set_index('run'),on='run')
df_data = df_data.join(beamq_df.set_index(['run','subrun','event']),on=['run','subrun','event'])

df_data_goodruns = df_data.query("good==1 and result==1")
df_data_goodruns_precuts = df_data_goodruns.query("PassPMTPrecut==1 and PassSimpleCuts==1")

print('Merged')

Loaded files
Merged


In [16]:
df_data_goodruns_precuts.to_parquet('../data/pickles/data_goodruns_precuts_%s.parquet'%tag)

print('Saved Parquet')

del df_data,df_data_goodruns,df_data_goodruns_precuts

print('Cleaned up')

Saved Parquet
Cleaned up


# Get POT for mc

In [None]:
df_pot = read_root('../data/mcc9_v13_bnb_overlay/old/pot_scrape.root','pot_tree',columns=['run', 'subrun','pot'])
RS=['run','subrun']

df_pot = pd.merge(df_numu_goodruns,df_pot,on=RS)
df_pot.drop_duplicates(RS,inplace=True)
print(df_pot['pot'].values.sum())

In [None]:
# Get list of RS
RS=['run','subrun']
df_rs = df_data_goodruns.drop_duplicates(RS)

f = open('../data/RS_data.txt','w')
for e in range(len(df_rs)):
    f.write('%i %i\n' % (df_rs['run'].values[e], df_rs['subrun'].values[e]))
f.close() 

In [None]:
def bless_scedr(row):
    scedr = float(-1)
    dx = row['Xreco'] - row['parentSCEX']
    dy = row['Yreco'] - row['parentSCEY']
    dz = row['Zreco'] - row['parentSCEZ']
    dr2 = dx*dx+dy*dy+dz*dz
    dr = np.sqrt(dr2)

    scedr = dr
    return scedr


In [None]:
def bless_proton_dedx(row):
    dedx = row['Proton_Edep']/float(row['Proton_TrackLength'])
    return dedx

def bless_proton_dedx_recombo(row):
    
    Rho = 1.383
    betap = 0.183592
    alpha = 0.921969

    Wion = 23.6e-6
    Efield = 0.273

    dedx = (np.exp(row['Proton_dQdx']*(betap/(Rho*Efield))*Wion)-alpha)/(betap/(Rho*Efield))
    return dedx

In [None]:
# restrict to 5e19 run range
#df_nue_goodruns_pmtprecut = df_nue_goodruns_pmtprecut.query('run>=5119 and run<=5955')