In [20]:
%matplotlib inline

from ROOT import TFile,vector,TGraph
import ROOT
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import numpy as np
from numpy import mean
from math import sqrt,acos,cos,sin,pi,exp,log,isnan,atan2
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from numpy import asarray
from root_pandas import read_root
from matplotlib import gridspec
from scipy import stats

In [21]:
def bless_MC_labels(row):
    mclabel = ''
    intlabel = ''
    parentlabel = ''
    pizero = [1090,1086,1090,1080,1015,1013,1011,1008,1006,1004]
    piplusminus = [1085,1079,1032,1017,1014,1007,1005,1003,1028,1021,1016,1012,1010,1009]
        
    if abs(row['nu_pdg']) == 12:
        intlabel = 'nue'
    elif abs(row['nu_pdg']) == 14:
        intlabel = 'numu'
        
    if not (row['MC_nproton']==1 and row['MC_nlepton']==1):
        return 'nLmP'
    elif not 0 < row['MC_scedr'] <= 5.0:
        return 'offvtx'
    elif not abs((row['MC_energyInit']-row['Enu_1m1p'])/row['MC_energyInit']) < 0.2:
        return 'badreco'    
    else:
        if row['nu_interaction_type'] == 1001:
            mclabel = 'CCQE'
        elif row['nu_interaction_type'] == 1000:
            mclabel = 'MEC'
        elif row['nu_interaction_type'] in pizero:
            mclabel = 'pizero'
        elif row['nu_interaction_type'] in piplusminus:
            mclabel = 'piplusminus' 
        else:
            mclabel = 'other'
           
    return '%s_%s'%(intlabel,mclabel)

In [7]:
RSE=['run','subrun','event']

def proc_df_mc(df_dlana,df_wgts,df_goodrun):
    
    df_full = df_dlana.join(df_goodrun.set_index('run'),on='run')
    if df_wgts!='':
        df_full = df_full.join(df_wgts.set_index(RSE)[['nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)

    df_full_goodruns = df_full.query('good==1')
    df_full_goodruns_precuts = df_full_goodruns.query('PassPMTPrecut==1 and PassSimpleCuts==1')
    if df_wgts != '':
        df_full_goodruns_precuts.insert(0,'mc_label',df_full_goodruns_precuts.apply(bless_MC_labels,axis=1))
    df_full_nodupes = df_full_goodruns_precuts.sort_values('BDTscore_1mu1p_cosmic',ascending=False).drop_duplicates(RSE).sort_index()
    
    return df_full_nodupes

In [4]:
# Time to load the good runs list
good_run1_df = pd.read_csv('../data/goodruns_2020.txt')
good_run3_df = pd.read_csv('../data/goodruns_2020_run3.txt')

good_run1_df['good'] = 1
good_run3_df['good'] = 1

In [5]:
# Beam quality
beamq_df = read_root('../data/beamdataquality_remix_bnb5e19.root','bdq')

In [6]:
tag = 'May1'

# MC BNB OVERLAY

##  Run1

In [13]:
df_bnb = read_root('../data/bnb_overlay/mcc9_v28_wctagger_bnboverlay_stripped.root','FinalVertexVariables')
df_bnb_cvweight = read_root('../data/bnb_overlay/weights_forCV_v40_bnb_nu_run1.root')

df_nodupes = proc_df_mc(df_bnb,df_bnb_cvweight,good_run1_df)  
df_nodupes = df_nodupes.query('not (nu_interaction_ccnc==0 and abs(nu_pdg)==12)') # cut out nue ccqes

df_nodupes.to_parquet('../data/pickles/numu_run1_nodupes%s.parquet'%tag)

In [14]:
df_bnb = read_root('../data/bnb_overlay/mcc9_v29e_run1_bnb_nu_overlay_LowE.root','dlana/FinalVertexVariables')
df_bnb_cvweight = read_root('../data/bnb_overlay/weights_forCV_v40_bnb_nu_lowE_run1.root')

df_nodupes = proc_df_mc(df_bnb,df_bnb_cvweight,good_run1_df)  
df_nodupes = df_nodupes.query('not (nu_interaction_ccnc==0 and abs(nu_pdg)==12)') # cut out nue ccqes

df_nodupes.to_parquet('../data/pickles/numu_lowe_run1_nodupes%s.parquet'%tag)

## Run3

In [19]:
df_bnb = read_root('../data/bnb_overlay/mcc9_v29e_dl_run3b_bnb_nu_overlay_nocrtremerge_stripped.root','FinalVertexVariables')
df_bnb_cvweight = read_root('../data/bnb_overlay/weights_forCV_v40_bnb_nu_run3.root')

df_nodupes = proc_df_mc(df_bnb,df_bnb_cvweight,good_run3_df)  
df_nodupes = df_nodupes.query('not (nu_interaction_ccnc==0 and abs(nu_pdg)==12)') # cut out nue ccqes

df_nodupes.to_parquet('../data/pickles/numu_run3_nodupes%s.parquet'%tag)

In [24]:
df_bnb = read_root('../data/bnb_overlay/mcc9_v29e_run3b_bnb_nu_overlay_LowE.root','dlana/FinalVertexVariables')
df_bnb_cvweight = read_root('../data/bnb_overlay/weights_forCV_v40_bnb_nu_lowE_run3.root')

df_nodupes = proc_df_mc(df_bnb,df_bnb_cvweight,good_run3_df)  
df_nodupes = df_nodupes.query('not (nu_interaction_ccnc==0 and abs(nu_pdg)==12)') # cut out nue ccqes

df_nodupes.to_parquet('../data/pickles/numu_lowe_run3_nodupes%s.parquet'%tag)

# DIRT

In [12]:
df_dirt = read_root('../data/dirt/FVV-Prime-dirt-Mar3-WC-1M1P.root','FinalVertexVariables')
df_dirt_cvweight = read_root('../data/dirt/weights_forCV_v40_dirt_nu_run1.root')

df_full = df_dirt.join(good_run1_df.set_index('run'),on='run')
df_full = df_full.join(df_dirt_cvweight.set_index(RSE)[['nu_interaction_mode','nu_interaction_type','xsec_corr_weight','spline_weight','nu_interaction_ccnc','nu_pdg']],on=RSE)
df_full_goodruns = df_full.query('good==1')
df_full_goodruns_precuts = df_full_goodruns.query('PassPMTPrecut==1 and PassSimpleCuts==1')

with open('/home/dcianci/Physics/1e1p/1mu1pSelection/bdtweights_1mu1p_WC_apr1.pickle','rb') as handle: cosmicBDT,nubkgBDT = pickle.load(handle)          # Load BDT weights for 1mu1p background differentiation    
myvars = ['Eta','ChargeNearTrunk','PT_1m1p','PhiT_1m1p','AlphaT_1m1p','Sph_1m1p','Q0_1m1p','Q2_1m1p','Q3_1m1p','Lepton_ThetaReco','Lepton_PhiReco','Proton_ThetaReco','Proton_PhiReco','PTRat_1m1p','Lepton_TrackLength','Thetas','Phis','Proton_TrackLength','OpenAng','PzEnu_1m1p']
s_nubdtname = 'BDTscore_1mu1p_nu'
s_cosbdtname = 'BDTscore_1mu1p_cosmic'
df_full_goodruns_precuts.insert(0,s_nubdtname,nubkgBDT.predict(df_full_goodruns_precuts[myvars].values.tolist(),output_margin=True))
df_full_goodruns_precuts.insert(0,s_cosbdtname,cosmicBDT.predict(df_full_goodruns_precuts[myvars].values.tolist(),output_margin=True))

df_nodupes = df_full_goodruns_precuts.sort_values('BDTscore_1mu1p_cosmic',ascending=False).drop_duplicates(RSE).sort_index()
df_nodupes.to_parquet('../data/pickles/dirt_run1_nodupes%s.parquet'%tag)

del df_full,df_full_goodruns,df_full_goodruns_precuts

7557 1470


# MC NUE OVERLAY

In [21]:
df_nue = read_root('../data/nue_intrinsic_overlay/mcc9_v28_wctagger_nueintrinsics_stripped.root')
df_nue_cvweights = read_root('../data/nue_intrinsic_overlay/weights_forCV_v40_intrinsic_nue_run1.root')

df_nodupes = proc_df_mc(df_nue,df_nue_cvweights,good_run1_df)  

df_nodupes.to_parquet('../data/pickles/nue_run1_nodupes%s.parquet'%tag)

In [22]:
df_nue = read_root('../data/nue_intrinsic_overlay/mcc9_v29e_run3b_bnb_intrinsic_nue_overlay_nocrtremerge_stripped.root')
df_nue_cvweights = read_root('../data/nue_intrinsic_overlay/weights_forCV_v40_intrinsic_nue_run3.root')

df_nodupes = proc_df_mc(df_nue,df_nue_cvweights,good_run3_df)  

df_nodupes.to_parquet('../data/pickles/nue_run3_nodupes%s.parquet'%tag)

# EXT

In [27]:
df_ext = read_root('../data/ext/mcc9_v28_wctagger_extbnbFULL_stripped.root')
    
df_ext = df_ext.join(good_run1_df.set_index('run'),on='run')
df_ext_goodruns = df_ext.query("good==1")
df_ext_goodruns_precuts = df_ext_goodruns.query("PassPMTPrecut==1 and PassSimpleCuts==1")
df_nodupes = df_ext_goodruns_precuts.sort_values('BDTscore_1mu1p_cosmic',ascending=False).drop_duplicates(RSE).sort_index()

df_nodupes.to_parquet('../data/pickles/ext_run1_nodupes%s.parquet'%tag)
 
del df_ext,df_ext_goodruns,df_ext_goodruns_precuts

In [25]:
df_ext = read_root('../data/ext/mcc9_v29e_dl_run3_G1_extbnb_stripped.root')
    
df_ext = df_ext.join(good_run3_df.set_index('run'),on='run')
df_ext_goodruns = df_ext.query("good==1")
df_ext_goodruns_precuts = df_ext_goodruns.query("PassPMTPrecut==1 and PassSimpleCuts==1")
df_nodupes = df_ext_goodruns_precuts.sort_values('BDTscore_1mu1p_cosmic',ascending=False).drop_duplicates(RSE).sort_index()

df_nodupes.to_parquet('../data/pickles/ext_run3_nodupes%s.parquet'%tag)

del df_ext,df_ext_goodruns,df_ext_goodruns_precuts

# Data

In [10]:
df_data = read_root('../data/bnb/mcc9_v28_wctagger_5e19.root','dlana/FinalVertexVariables')

df_data = df_data.join(good_run1_df.set_index('run'),on='run')
df_data = df_data.join(beamq_df.set_index(['run','subrun','event']),on=['run','subrun','event'])

df_data_goodruns = df_data.query("good==1 and result==1")
df_data_goodruns_precuts = df_data_goodruns.query("PassPMTPrecut==1 and PassSimpleCuts==1")
df_nodupes = df_data_goodruns_precuts.sort_values('BDTscore_1mu1p_cosmic',ascending=False).drop_duplicates(RSE).sort_index()

df_nodupes.to_parquet('../data/pickles/data_run1_nodupes%s.parquet'%tag)

del df_data,df_data_goodruns,df_data_goodruns_precuts

In [7]:
df_data = read_root('../data/bnb/mcc9_v29e_dl_run3_G1_bnb_dlfilter_1m1p_fvv.root','dlana/FinalVertexVariables')

df_data = df_data.join(good_run3_df.set_index('run'),on='run')
#df_data = df_data.join(beamq_df.set_index(['run','subrun','event']),on=['run','subrun','event'])

df_data_goodruns = df_data.query("good==1")# and result==1")
df_data_goodruns_precuts = df_data_goodruns.query("PassPMTPrecut==1 and PassSimpleCuts==1")
df_nodupes = df_data_goodruns_precuts.sort_values('BDTscore_1mu1p_cosmic',ascending=False).drop_duplicates(RSE).sort_index()

df_nodupes.to_parquet('../data/pickles/data_run3_filter_nodupes%s.parquet'%tag)

del df_data,df_data_goodruns,df_data_goodruns_precuts

# Get POT for mc