In [None]:
import os, sys
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import pandas as pd

import ROOT
import root_numpy as rn
from larocv import larocv

rse    = ['run','subrun','event']
rsev   = ['run','subrun','event','vtxid']
rserv  = ['run','subrun','event','roid','vtxid']

In [None]:
# Vertex data frame
dfs  = {}

# Event data frame
edfs = {}
mdfs = {}

sample_name0 = "nue"
sample_file0 = "../comb_ana_nue.root"

sample_name1 = "cosmic"
sample_file1 = "../comb_ana_cosmic_no_stopmu.root"


for name,file_ in [(sample_name0,sample_file0),
                   (sample_name1,sample_file1)]:
    
    INPUT_FILE  = file_
    
    #
    # Vertex wise Trees
    #
    vertex_df = pd.DataFrame(rn.root2array(INPUT_FILE,treename='VertexTree'))
    angle_df  = pd.DataFrame(rn.root2array(INPUT_FILE,treename='AngleAnalysis'))
    shape_df  = pd.DataFrame(rn.root2array(INPUT_FILE,treename='ShapeAnalysis'))
    gap_df    = pd.DataFrame(rn.root2array(INPUT_FILE,treename="GapAnalysis"))
    match_df  = pd.DataFrame(rn.root2array(INPUT_FILE,treename="MatchAnalysis"))
    dqds_df   = pd.DataFrame(rn.root2array(INPUT_FILE,treename="dQdSAnalysis"))

    #
    # Combine DataFrames
    #
    comb_df = pd.concat([vertex_df.set_index(rserv),
                         angle_df.set_index(rserv),
                         shape_df.set_index(rserv),
                         gap_df.set_index(rserv),
                         angle_df.set_index(rserv),
                         match_df.set_index(rserv),
                         dqds_df.set_index(rserv)],axis=1)

    comb_df = comb_df.reset_index()
    event_vertex_df   = pd.DataFrame(rn.root2array(INPUT_FILE,treename="EventVertexTree"))

    def drop_y(df):
        to_drop = [x for x in df if x.endswith('_y')]
        df.drop(to_drop, axis=1, inplace=True)
        
    comb_df = comb_df.set_index(rse).join(event_vertex_df.set_index(rse),how='outer',lsuffix='',rsuffix='_y').reset_index()
    drop_y(comb_df)
    
    if name == "nue":
        nufilter_df       = pd.DataFrame(rn.root2array(INPUT_FILE,treename="NuFilterTree"))
        mc_df             = pd.DataFrame(rn.root2array(INPUT_FILE,treename="MCTree"))
        
        comb_df = comb_df.set_index(rse).join(nufilter_df.set_index(rse),how='outer',lsuffix='',rsuffix='_y').reset_index()
        drop_y(comb_df)    
        
        comb_df = comb_df.set_index(rse).join(mc_df.set_index(rse),how='outer',lsuffix='',rsuffix='_y').reset_index()
        drop_y(comb_df)
        
        tmp = pd.DataFrame(rn.root2array("nue_cosmo_ana.root",treename='EventCosmicPixelTree'))
        comb_df = comb_df.set_index(rse).join(tmp.set_index(rse),how='outer',lsuffix='',rsuffix='_y').reset_index()
        drop_y(comb_df)
        

    #
    # Store vertex wise data frame
    #
    comb_df = comb_df.reset_index()
    comb_df = comb_df.loc[:,~comb_df.columns.duplicated()]
    
    comb_df['cvtxid'] = 0.0
    
    def func(group):
        group['cvtxid'] = np.arange(0,group['cvtxid'].size)
        return group

    comb_df = comb_df.groupby(['run','subrun','event']).apply(func)
    
    dfs[name] = comb_df.copy()

In [None]:
def track_shower_assumption(df):
    df['trkid'] = df.apply(lambda x : 0 if(x['par1_type']==1) else 1,axis=1)
    df['shrid'] = df.apply(lambda x : 1 if(x['par2_type']==2) else 0,axis=1)
    
    df['trk_frac_avg'] = df.apply(lambda x : x['par1_frac'] if(x['par1_type']==1) else x['par2_frac'],axis=1)
    df['shr_frac_avg'] = df.apply(lambda x : x['par2_frac'] if(x['par2_type']==2) else x['par1_frac'],axis=1)

ts_mdf_m = {}

for name, comb_df in dfs.copy().iteritems():
    print
    print "@ sample",name
    print
    
    ts_mdf = comb_df.copy()

    print "Asking nue assumption"
    print "Asking npar==2"
    print "Asking in_fiducial==1"
    print "Asking pathexists2==1"

    ts_mdf = ts_mdf.query("npar==2")
    track_shower_assumption(ts_mdf)
    ts_mdf = ts_mdf.query("par1_type != par2_type")
    ts_mdf = ts_mdf.query("in_fiducial==1")
    ts_mdf = ts_mdf.query("pathexists2==1")

    
    #
    # SSNet Fraction
    #
    ts_mdf['trk_frac'] = ts_mdf.apply(lambda x : x['trk_frac_avg'] / x['nplanes_v'][x['trkid']],axis=1) 
    ts_mdf['shr_frac'] = ts_mdf.apply(lambda x : x['shr_frac_avg'] / x['nplanes_v'][x['shrid']],axis=1) 
    
    #
    # PCA
    #
    
    ts_mdf['cosangle3d']=ts_mdf.apply(lambda x : larocv.CosOpeningAngle(x['par_trunk_pca_theta_estimate_v'][0],
                                                                        x['par_trunk_pca_phi_estimate_v'][0],
                                                                        x['par_trunk_pca_theta_estimate_v'][1],
                                                                        x['par_trunk_pca_phi_estimate_v'][1]),axis=1)
    
    ts_mdf['angle3d'] = ts_mdf.apply(lambda x : np.arccos(x['cosangle3d']),axis=1)
    
    
    ts_mdf['shr_trunk_pca_theta_estimate'] = ts_mdf.apply(lambda x : x['par_trunk_pca_theta_estimate_v'][x['shrid']],axis=1) 
    ts_mdf['trk_trunk_pca_theta_estimate'] = ts_mdf.apply(lambda x : x['par_trunk_pca_theta_estimate_v'][x['trkid']],axis=1) 
    
    ts_mdf['shr_trunk_pca_cos_theta_estimate'] = ts_mdf.apply(lambda x : np.cos(x['par_trunk_pca_theta_estimate_v'][x['shrid']]),axis=1) 
    ts_mdf['trk_trunk_pca_cos_theta_estimate'] = ts_mdf.apply(lambda x : np.cos(x['par_trunk_pca_theta_estimate_v'][x['trkid']]),axis=1) 

    
    #
    # 3D
    #
    ts_mdf['shr_3d_length'] = ts_mdf.apply(lambda x : x['par_pca_end_len_v'][x['shrid']],axis=1)
    ts_mdf['trk_3d_length'] = ts_mdf.apply(lambda x : x['par_pca_end_len_v'][x['trkid']],axis=1)

    ts_mdf['shr_3d_QavgL'] = ts_mdf.apply(lambda x : x['qsum_v'][x['shrid']] / x['par_pca_end_len_v'][x['shrid']] / x['nplanes_v'][x['shrid']],axis=1)
    ts_mdf['trk_3d_QavgL'] = ts_mdf.apply(lambda x : x['qsum_v'][x['trkid']] / x['par_pca_end_len_v'][x['trkid']] / x['nplanes_v'][x['trkid']],axis=1)

    #
    # Max deflection
    #
    ts_mdf['shr_triangle_d_max'] = ts_mdf.apply(lambda x : x['triangle_d_max_v'][x['shrid']],axis=1)
    ts_mdf['trk_triangle_d_max'] = ts_mdf.apply(lambda x : x['triangle_d_max_v'][x['trkid']],axis=1)
    
    #
    # Mean pixel dist from 2D PCA
    #
    ts_mdf['shr_mean_pixel_dist'] = ts_mdf.apply(lambda x : x['mean_pixel_dist_v'][x['shrid']]/x['nplanes_v'][x['shrid']],axis=1)
    ts_mdf['shr_mean_pixel_dist_max'] = ts_mdf.apply(lambda x : x['mean_pixel_dist_max_v'][x['shrid']],axis=1)
    ts_mdf['shr_mean_pixel_dist_min'] = ts_mdf.apply(lambda x : x['mean_pixel_dist_min_v'][x['shrid']],axis=1)
    ts_mdf['shr_mean_pixel_dist_ratio'] = ts_mdf.apply(lambda x : x['mean_pixel_dist_min_v'][x['shrid']] / x['mean_pixel_dist_max_v'][x['shrid']],axis=1)
    
    ts_mdf['trk_mean_pixel_dist'] = ts_mdf.apply(lambda x : x['mean_pixel_dist_v'][x['trkid']]/x['nplanes_v'][x['trkid']],axis=1)
    ts_mdf['trk_mean_pixel_dist_max'] = ts_mdf.apply(lambda x : x['mean_pixel_dist_max_v'][x['trkid']],axis=1)
    ts_mdf['trk_mean_pixel_dist_min'] = ts_mdf.apply(lambda x : x['mean_pixel_dist_min_v'][x['trkid']],axis=1)
    ts_mdf['trk_mean_pixel_dist_ratio'] = ts_mdf.apply(lambda x : x['mean_pixel_dist_min_v'][x['trkid']] / x['mean_pixel_dist_max_v'][x['trkid']],axis=1)     

    #
    # Sigma pixel dist from 2D PCA
    #
    ts_mdf['shr_sigma_pixel_dist']       = ts_mdf.apply(lambda x : x['sigma_pixel_dist_v'][x['shrid']]/x['nplanes_v'][x['shrid']],axis=1)
    ts_mdf['shr_sigma_pixel_dist_max']   = ts_mdf.apply(lambda x : x['sigma_pixel_dist_max_v'][x['shrid']],axis=1)
    ts_mdf['shr_sigma_pixel_dist_min']   = ts_mdf.apply(lambda x : x['sigma_pixel_dist_min_v'][x['shrid']],axis=1)
    ts_mdf['shr_sigma_pixel_dist_ratio'] = ts_mdf.apply(lambda x : x['sigma_pixel_dist_min_v'][x['shrid']] / x['sigma_pixel_dist_max_v'][x['shrid']],axis=1)
    
    ts_mdf['trk_sigma_pixel_dist']       = ts_mdf.apply(lambda x : x['sigma_pixel_dist_v'][x['trkid']]/x['nplanes_v'][x['trkid']],axis=1)
    ts_mdf['trk_sigma_pixel_dist_max']   = ts_mdf.apply(lambda x : x['sigma_pixel_dist_max_v'][x['trkid']],axis=1)
    ts_mdf['trk_sigma_pixel_dist_min']   = ts_mdf.apply(lambda x : x['sigma_pixel_dist_min_v'][x['trkid']],axis=1)
    ts_mdf['trk_sigma_pixel_dist_ratio'] = ts_mdf.apply(lambda x : x['sigma_pixel_dist_min_v'][x['trkid']] / x['sigma_pixel_dist_max_v'][x['trkid']],axis=1)    

    #
    # Ratio of # num pixels
    #
    ts_mdf['shr_par_pixel_ratio'] = ts_mdf.apply(lambda x : x['par_pixel_ratio_v'][x['shrid']],axis=1)
    ts_mdf['trk_par_pixel_ratio'] = ts_mdf.apply(lambda x : x['par_pixel_ratio_v'][x['trkid']],axis=1) 

    #
    # 2D angle difference @ vertex
    #
    ts_mdf['anglediff0'] = ts_mdf['anglediff'].values 

    #
    # 2D length
    #
    ts_mdf['shr_avg_length']   = ts_mdf.apply(lambda x : x['length_v'][x['shrid']] / x['nplanes_v'][x['shrid']],axis=1)
    ts_mdf['shr_length_min']   = ts_mdf.apply(lambda x : x['length_min_v'][x['shrid']],axis=1)
    ts_mdf['shr_length_max']   = ts_mdf.apply(lambda x : x['length_max_v'][x['shrid']],axis=1)
    ts_mdf['shr_length_ratio'] = ts_mdf.apply(lambda x : x['length_min_v'][x['shrid']] / x['length_max_v'][x['shrid']],axis=1)
    
    ts_mdf['trk_avg_length']   = ts_mdf.apply(lambda x : x['length_v'][x['trkid']] / x['nplanes_v'][x['trkid']],axis=1)
    ts_mdf['trk_length_min']   = ts_mdf.apply(lambda x : x['length_min_v'][x['trkid']],axis=1)
    ts_mdf['trk_length_max']   = ts_mdf.apply(lambda x : x['length_max_v'][x['trkid']],axis=1)
    ts_mdf['trk_length_ratio'] = ts_mdf.apply(lambda x : x['length_min_v'][x['trkid']] / x['length_max_v'][x['trkid']],axis=1)
    
    #
    # 2D width
    #
    ts_mdf['shr_avg_width']   = ts_mdf.apply(lambda x : x['width_v'][x['shrid']] / x['nplanes_v'][x['shrid']],axis=1)
    ts_mdf['shr_width_min']   = ts_mdf.apply(lambda x : x['width_min_v'][x['shrid']],axis=1)
    ts_mdf['shr_width_max']   = ts_mdf.apply(lambda x : x['width_max_v'][x['shrid']],axis=1)
    ts_mdf['shr_width_ratio'] = ts_mdf.apply(lambda x : x['width_min_v'][x['shrid']] / x['width_max_v'][x['shrid']],axis=1)

    ts_mdf['trk_avg_width']   = ts_mdf.apply(lambda x : x['width_v'][x['trkid']] / x['nplanes_v'][x['trkid']],axis=1)
    ts_mdf['trk_width_max']   = ts_mdf.apply(lambda x : x['width_max_v'][x['trkid']],axis=1)
    ts_mdf['trk_width_min']   = ts_mdf.apply(lambda x : x['width_max_v'][x['trkid']],axis=1)
    ts_mdf['trk_width_ratio'] = ts_mdf.apply(lambda x : x['width_min_v'][x['trkid']] / x['width_max_v'][x['trkid']],axis=1)

    #
    # 2D perimeter
    #
    ts_mdf['shr_avg_perimeter'] = ts_mdf.apply(lambda x : x['perimeter_v'][x['shrid']] / x['nplanes_v'][x['shrid']],axis=1)
    ts_mdf['shr_perimeter_min'] = ts_mdf.apply(lambda x : x['perimeter_min_v'][x['shrid']],axis=1)
    ts_mdf['shr_perimeter_max'] = ts_mdf.apply(lambda x : x['perimeter_max_v'][x['shrid']],axis=1)
    ts_mdf['shr_perimeter_ratio'] = ts_mdf.apply(lambda x : x['perimeter_min_v'][x['shrid']] / x['perimeter_max_v'][x['shrid']],axis=1)
    
    ts_mdf['trk_avg_perimeter'] = ts_mdf.apply(lambda x : x['perimeter_v'][x['trkid']] / x['nplanes_v'][x['trkid']],axis=1)
    ts_mdf['trk_perimeter_min'] = ts_mdf.apply(lambda x : x['perimeter_max_v'][x['trkid']],axis=1)
    ts_mdf['trk_perimeter_max'] = ts_mdf.apply(lambda x : x['perimeter_max_v'][x['trkid']],axis=1)
    ts_mdf['trk_perimeter_ratio'] = ts_mdf.apply(lambda x : x['perimeter_min_v'][x['trkid']] / x['perimeter_max_v'][x['trkid']],axis=1)

    #
    # 2D area
    #
    ts_mdf['shr_avg_area'] = ts_mdf.apply(lambda x : x['area_v'][x['shrid']] / x['nplanes_v'][x['shrid']],axis=1)
    ts_mdf['shr_area_min'] = ts_mdf.apply(lambda x : x['area_min_v'][x['shrid']],axis=1)
    ts_mdf['shr_area_max'] = ts_mdf.apply(lambda x : x['area_max_v'][x['shrid']],axis=1)
    ts_mdf['shr_area_ratio'] = ts_mdf.apply(lambda x : x['area_min_v'][x['shrid']] / x['area_max_v'][x['shrid']],axis=1)
    
    ts_mdf['trk_avg_area'] = ts_mdf.apply(lambda x : x['area_v'][x['trkid']] / x['nplanes_v'][x['trkid']],axis=1)
    ts_mdf['trk_area_min'] = ts_mdf.apply(lambda x : x['area_max_v'][x['trkid']],axis=1)
    ts_mdf['trk_area_max'] = ts_mdf.apply(lambda x : x['area_max_v'][x['trkid']],axis=1)
    ts_mdf['trk_area_ratio'] = ts_mdf.apply(lambda x : x['area_min_v'][x['trkid']] / x['area_max_v'][x['trkid']],axis=1)

    #
    # N pixel
    #
    ts_mdf['shr_avg_npixel'] = ts_mdf.apply(lambda x : x['npixel_v'][x['shrid']] / x['nplanes_v'][x['shrid']],axis=1)
    ts_mdf['shr_npixel_min'] = ts_mdf.apply(lambda x : x['npixel_min_v'][x['shrid']],axis=1)
    ts_mdf['shr_npixel_max'] = ts_mdf.apply(lambda x : x['npixel_max_v'][x['shrid']],axis=1)
    ts_mdf['shr_npixel_ratio'] = ts_mdf.apply(lambda x : x['npixel_min_v'][x['shrid']] / x['npixel_max_v'][x['shrid']],axis=1)
    
    ts_mdf['trk_avg_npixel'] = ts_mdf.apply(lambda x : x['npixel_v'][x['trkid']] / x['nplanes_v'][x['trkid']],axis=1)
    ts_mdf['trk_npixel_min'] = ts_mdf.apply(lambda x : x['npixel_max_v'][x['trkid']],axis=1)
    ts_mdf['trk_npixel_max'] = ts_mdf.apply(lambda x : x['npixel_max_v'][x['trkid']],axis=1)
    ts_mdf['trk_npixel_ratio'] = ts_mdf.apply(lambda x : x['npixel_min_v'][x['trkid']] / x['npixel_max_v'][x['trkid']],axis=1)

    #
    # Q sum
    #
    ts_mdf['shr_avg_qsum']   = ts_mdf.apply(lambda x : x['qsum_v'][x['shrid']] / x['nplanes_v'][x['shrid']],axis=1)
    ts_mdf['shr_qsum_min']   = ts_mdf.apply(lambda x : x['qsum_min_v'][x['shrid']],axis=1)
    ts_mdf['shr_qsum_max']   = ts_mdf.apply(lambda x : x['qsum_max_v'][x['shrid']],axis=1)
    ts_mdf['shr_qsum_ratio'] = ts_mdf.apply(lambda x : x['qsum_min_v'][x['shrid']] / x['qsum_max_v'][x['shrid']],axis=1)
    
    ts_mdf['trk_avg_qsum']   = ts_mdf.apply(lambda x : x['qsum_v'][x['trkid']] / x['nplanes_v'][x['trkid']],axis=1)
    ts_mdf['trk_qsum_min']   = ts_mdf.apply(lambda x : x['qsum_max_v'][x['trkid']],axis=1)
    ts_mdf['trk_qsum_max']   = ts_mdf.apply(lambda x : x['qsum_max_v'][x['trkid']],axis=1)
    ts_mdf['trk_qsum_ratio'] = ts_mdf.apply(lambda x : x['qsum_min_v'][x['trkid']] / x['qsum_max_v'][x['trkid']],axis=1)

    #
    #
    #
    ts_mdf_m[name] = ts_mdf.copy()


In [None]:
name='nue'
print "~~~~~~~~~ Raw Output ~~~~~~~~~"
all_df   = dfs[name]
event_df = dfs[name].groupby(rse).nth(0) 

scedr=5
print "Loaded events...........",event_df.index.size
print "...good cROI counter....",event_df.query("good_croi_ctr>0").index.size
print "...reco.................",event_df.query("num_vertex>0").index.size
print
if name=='nue':
    print "1L1P....................",event_df.query("selected1L1P==1").index.size
    print "...good cROI counter....",event_df.query("good_croi_ctr>0 & selected1L1P==1").index.size
    print "...reco.................",event_df.query("good_croi_ctr>0 & selected1L1P==1 & num_vertex>0").dropna().index.size
    print
    print "1L1P E in [200,800] MeV.",event_df.query("selected1L1P==1 & energyInit>=200 & energyInit<=800").index.size
    print "...good cROI counter....",event_df.query("selected1L1P==1 & good_croi_ctr>0 & energyInit>=200 & energyInit<=800").index.size
    print "...reco.................",event_df.query("selected1L1P==1 & good_croi_ctr>0 & energyInit>=200 & energyInit<=800 & num_vertex>0").dropna().index.size
    print
    print "===> GOOD vertices scedr<{} <===".format(scedr)
    SS="scedr<@scedr"
    print "...total................",all_df.query("num_vertex>0").query(SS).index.size
    print "...events...............",len(all_df.query("num_vertex>0").query(SS).groupby(rse))
    print
    print ">>>good cROI<<<"
    SS="scedr<@scedr & good_croi_ctr>0"
    print "...total................",all_df.query("num_vertex>0").query(SS).index.size
    print "...events...............",len(all_df.query("num_vertex>0").query(SS).groupby(rse))
    print 
    print ">>>good cROI + 1L1P<<<"
    SS="scedr<@scedr & good_croi_ctr>0 & selected1L1P==1"
    print "...total................",all_df.query("num_vertex>0").query(SS).index.size
    print "...events...............",len(all_df.query("num_vertex>0").query(SS).groupby(rse))
    print
    print ">>>good cROI + 1L1P + E<<<"
    SS="scedr<@scedr & good_croi_ctr>0 & selected1L1P==1 & energyInit>=200 & energyInit<=800"
    print "...total................",all_df.query("num_vertex>0").query(SS).index.size
    print "...events...............",len(all_df.query("num_vertex>0").query(SS).groupby(rse))
    print

if name=='cosmic':
    print "===> Total Vertices <===".format(scedr)
    print "...total................",all_df.query("num_vertex>0").index.size
    print "...events...............",len(all_df.query("num_vertex>0").groupby(rse))
    print

In [None]:
name='nue'
print "~~~~~~~~~ Nue Assumption Output ~~~~~~~~~"
all_df   = ts_mdf_m[name]
event_df = dfs[name].groupby(rse).nth(0) 

scedr=5
print "Loaded events...........",event_df.index.size
print "...good cROI counter....",event_df.query("good_croi_ctr>0").index.size
print "...reco.................",event_df.query("num_vertex>0").index.size
print
if name == 'nue':
    print "1L1P....................",event_df.query("selected1L1P==1").index.size
    print "...good cROI counter....",event_df.query("good_croi_ctr>0 & selected1L1P==1").index.size
    print "...reco.................",event_df.query("good_croi_ctr>0 & selected1L1P==1 & num_vertex>0").dropna().index.size
    print
    print "1L1P E in [200,800] MeV.",event_df.query("selected1L1P==1 & energyInit>=200 & energyInit<=800").index.size
    print "...good cROI counter....",event_df.query("selected1L1P==1 & good_croi_ctr>0 & energyInit>=200 & energyInit<=800").index.size
    print "...reco.................",event_df.query("selected1L1P==1 & good_croi_ctr>0 & energyInit>=200 & energyInit<=800 & num_vertex>0").dropna().index.size
    print
    print "===> GOOD vertices scedr<{} <===".format(scedr)
    SS="scedr<@scedr"
    print "...total................",all_df.query("num_vertex>0").query(SS).index.size
    print "...events...............",len(all_df.query("num_vertex>0").query(SS).groupby(rse))
    print
    print ">>>good cROI<<<"
    SS="scedr<@scedr & good_croi_ctr>0"
    print "...total................",all_df.query("num_vertex>0").query(SS).index.size
    print "...events...............",len(all_df.query("num_vertex>0").query(SS).groupby(rse))
    print 
    print ">>>good cROI + 1L1P<<<"
    SS="scedr<@scedr & good_croi_ctr>0 & selected1L1P==1"
    print "...total................",all_df.query("num_vertex>0").query(SS).index.size
    print "...events...............",len(all_df.query("num_vertex>0").query(SS).groupby(rse))
    print
    print ">>>good cROI + 1L1P + E<<<"
    SS="scedr<@scedr & good_croi_ctr>0 & selected1L1P==1 & energyInit>=200 & energyInit<=800"
    print "...total................",all_df.query("num_vertex>0").query(SS).index.size
    print "...events...............",len(all_df.query("num_vertex>0").query(SS).groupby(rse))
    print
if name=='cosmic':
    print "===> Total Vertices <===".format(scedr)
    print "...total................",all_df.query("num_vertex>0").index.size
    print "...events...............",len(all_df.query("num_vertex>0").groupby(rse))
    print

In [None]:
import collections
pdf_m = collections.OrderedDict()

# xlo= 0.0
# xhi= 40.0
# dx = 2
# pdf_m['shr_triangle_d_max'] = ((xlo,xhi,dx),"Shower - Max 2D Deflection [pix]")

# xlo= 0.0
# xhi= 40.0
# dx = 2
# pdf_m['trk_triangle_d_max'] = ((xlo,xhi,dx),"Track - Max 2D Deflection [pix]")

#
#
#
xlo= 0.0
xhi= 10.0
dx = 0.2
pdf_m['shr_mean_pixel_dist'] = ((xlo,xhi,dx),"Shower - Mean Distance from 2D PCA [pix]")

# xlo= 0.0
# xhi= 10.0
# dx = 0.2
# pdf_m['trk_mean_pixel_dist'] = ((xlo,xhi,dx),"Track - Mean Distance from 2D PCA [pix]")

# xlo= 0.0
# xhi= 10.0
# dx = 0.2
# pdf_m['shr_mean_pixel_dist_max'] = ((xlo,xhi,dx),"Shower - Max Mean Distance from 2D PCA [pix]")

# xlo= 0.0
# xhi= 10.0
# dx = 0.2
# pdf_m['trk_mean_pixel_dist_max'] = ((xlo,xhi,dx),"Track - Max Mean Distance from 2D PCA [pix]")


#
#
#
xlo= 0.0
xhi= 10.0
dx = 0.2
pdf_m['shr_sigma_pixel_dist'] = ((xlo,xhi,dx),"Shower - Sigma Distance from 2D PCA [pix]")

# xlo= 0.0
# xhi= 10.0
# dx = 0.2
# pdf_m['trk_sigma_pixel_dist'] = ((xlo,xhi,dx),"Track - Sigma Distance from 2D PCA [pix]")

# xlo= 0.0
# xhi= 10.0
# dx = 0.2
# pdf_m['shr_sigma_pixel_dist_max'] = ((xlo,xhi,dx),"Shower - Max Sigma Distance from 2D PCA [pix]")

# xlo= 0.0
# xhi= 10.0
# dx = 0.2
# pdf_m['trk_sigma_pixel_dist_max'] = ((xlo,xhi,dx),"Track - Max Sigma Distance from 2D PCA [pix]")


#
#
#
xlo= 0.0
xhi= 1.0
dx = 0.025
pdf_m['shr_par_pixel_ratio'] = ((xlo,xhi,dx),"Shower - Cluster Size Ratio")

xlo= 0.0
xhi= 1.0
dx = 0.025
pdf_m['trk_par_pixel_ratio'] = ((xlo,xhi,dx),"Track - Cluster Size Ratio")

xlo=-1.0
xhi= 1.0
dx = 0.05
pdf_m['cosangle3d'] = ((xlo,xhi,dx),"Cos 3D Opening Angle")

#xlo= 0
#xhi= 3.14159
#dx = 3.14159/40.0
#pdf_m['angle3d'] = ((xlo,xhi,dx),"3D Opening Angle")

xlo= 0
xhi= 180
dx = 5
pdf_m['anglediff0'] = ((xlo,xhi,dx),"2D Angle Difference [deg]")

xlo=-1.0
xhi= 1.0
dx = 0.05
pdf_m['shr_trunk_pca_cos_theta_estimate'] = ((xlo,xhi,dx),"Shower - Cos 3D Beam Angle")

xlo=-1.0
xhi= 1.0
dx = 0.05
pdf_m['trk_trunk_pca_cos_theta_estimate'] = ((xlo,xhi,dx),"Track - Cos 3D Beam Angle")

#xlo= 0
#xhi= 3.14159
#dx = 3.14159/40.0
#pdf_m['trk_trunk_pca_theta_estimate'] = ((xlo,xhi,dx),"Track - 3D Beam Angle")

#xlo= 0
#xhi= 3.14159
#dx = 3.14159/40.0
#pdf_m['shr_trunk_pca_theta_estimate'] = ((xlo,xhi,dx),"Shower - 3D Beam Angle")


#
# Length
#
xlo= 0
xhi= 500
dx = 10
pdf_m['shr_avg_length'] = ((xlo,xhi,dx),"Shower - Average 2D Length [pix]")

xlo= 0
xhi= 500
dx = 10
pdf_m['trk_avg_length'] = ((xlo,xhi,dx),"Track - Average 2D Length [pix]")

# xlo= 0
# xhi= 300
# dx = 5
# pdf_m['shr_length_min'] = ((xlo,xhi,dx),"Shower - Min 2D Length [pix]")

# xlo= 0
# xhi= 300
# dx = 5
# pdf_m['trk_length_min'] = ((xlo,xhi,dx),"Track - Min 2D Length [pix]")

# xlo= 0
# xhi= 300
# dx = 5
# pdf_m['shr_length_max'] = ((xlo,xhi,dx),"Shower - Max 2D Length [pix]")

# xlo= 0
# xhi= 300
# dx = 5
# pdf_m['trk_length_max'] = ((xlo,xhi,dx),"Track - Max 2D Length [pix]")


#
# Area
#
xlo= 0
xhi= 1000
dx = 20
pdf_m['shr_avg_area'] = ((xlo,xhi,dx),"Shower - Average 2D Area [pix^2]")

xlo= 0
xhi= 1000
dx = 20
pdf_m['trk_avg_area'] = ((xlo,xhi,dx),"Track - Average 2D Area [pix^2]")

# xlo= 0
# xhi= 600
# dx = 10
# pdf_m['shr_area_min'] = ((xlo,xhi,dx),"Shower - Min 2D Area [pix^2]")

# xlo= 0
# xhi= 600
# dx = 10
# pdf_m['trk_area_min'] = ((xlo,xhi,dx),"Track - Min 2D Area [pix^2]")

# xlo= 0
# xhi= 600
# dx = 10
# pdf_m['shr_area_max'] = ((xlo,xhi,dx),"Shower - Max 2D Area [pix^2]")

# xlo= 0
# xhi= 600
# dx = 10
# pdf_m['trk_area_max'] = ((xlo,xhi,dx),"Track - Max 2D Area [pix^2]")

#
# 3D length
#
xlo= 0
xhi= 100
dx = 2
pdf_m['shr_3d_length'] = ((xlo,xhi,dx),"Shower - 3D Length [cm]")

xlo= 0
xhi= 100
dx = 2
pdf_m['trk_3d_length'] = ((xlo,xhi,dx),"Track - 3D Length [cm]")

#
# Width
#

xlo= 0
xhi= 50
dx = 1
pdf_m['shr_avg_width'] = ((xlo,xhi,dx),"Shower - Average 2D Width [px]")

xlo= 0
xhi= 50
dx = 1
pdf_m['trk_avg_width'] = ((xlo,xhi,dx),"Track - Average 2D Width [px]")

# xlo= 0
# xhi= 50
# dx = 2
# pdf_m['shr_width_min'] = ((xlo,xhi,dx),"Shower - Min 2D Width [px]")

# xlo= 0
# xhi= 50
# dx = 2
# pdf_m['trk_width_min'] = ((xlo,xhi,dx),"Track - Min 2D Width [px]")

# xlo= 0
# xhi= 50
# dx = 2
# pdf_m['shr_width_max'] = ((xlo,xhi,dx),"Shower - Max 2D Width [px]")

# xlo= 0
# xhi= 50
# dx = 2
# pdf_m['trk_width_max'] = ((xlo,xhi,dx),"Track - Max 2D Width [px]")


#
# npixel
#
# xlo= 0
# xhi= 1000
# dx = 20
# pdf_m['shr_avg_npixel'] = ((xlo,xhi,dx),"Shower - Average Num. Pixel")

# xlo= 0
# xhi= 1000
# dx = 20
# pdf_m['trk_avg_npixel'] = ((xlo,xhi,dx),"Track - Average Num. Pixel")

# xlo= 0
# xhi= 600
# dx = 10
# pdf_m['shr_npixel_min'] = ((xlo,xhi,dx),"Shower - Min Num. Pixel")

# xlo= 0
# xhi= 600
# dx = 10
# pdf_m['trk_npixel_min'] = ((xlo,xhi,dx),"Track - Min Num. Pixel")

# xlo= 0
# xhi= 600
# dx = 10
# pdf_m['shr_npixel_max'] = ((xlo,xhi,dx),"Shower - Max Num. Pixel")

# xlo= 0
# xhi= 600
# dx = 10
# pdf_m['trk_npixel_max'] = ((xlo,xhi,dx),"Track - Max Num. Pixel")

#
# Perimeter
#
#xlo= 0
#xhi= 300
#dx = 5
#pdf_m['shr_avg_perimeter'] = ((xlo,xhi,dx),"Shower - Average 2D Perimeter [pix]")

#xlo= 0
#xhi= 300
#dx = 5
#pdf_m['trk_avg_perimeter'] = ((xlo,xhi,dx),"Track - Average 2D Perimeter [pix]")

# xlo= 0
# xhi= 300
# dx = 5
# pdf_m['shr_perimeter_min'] = ((xlo,xhi,dx),"Shower - Min 2D Perimeter [pix]")

# xlo= 0
# xhi= 300
# dx = 5
# pdf_m['trk_perimeter_min'] = ((xlo,xhi,dx),"Track - Min 2D Perimeter [pix]")

# xlo= 0
# xhi= 300
# dx = 5
# pdf_m['shr_perimeter_min'] = ((xlo,xhi,dx),"Shower - Max 2D Perimeter [pix]")

# xlo= 0
# xhi= 300
# dx = 5
# pdf_m['trk_perimeter_min'] = ((xlo,xhi,dx),"Track - Max 2D Perimeter [pix]")

#
# Qaverage/L
#

xlo= 0
xhi= 5000
dx = 50
pdf_m['shr_3d_QavgL'] = ((xlo,xhi,dx),"Shower - Average Charge / 3D Length [pix/cm]")

xlo= 0
xhi= 5000
dx = 50
pdf_m['trk_3d_QavgL'] = ((xlo,xhi,dx),"Track - Average Charge / 3D Length [pix/cm]")

xlo= 0
xhi= 1
dx = 0.025
pdf_m['dqds_ratio_01'] = ((xlo,xhi,dx),"dQ/dX Ratio")

xlo= 0
xhi= 500
dx = 10
pdf_m['dqds_diff_01'] = ((xlo,xhi,dx), "dQ/dX Difference [pix/cm]" )

xlo= 0.5
xhi= 1
dx = 0.01
pdf_m['trk_frac'] = ((xlo,xhi,dx),"Track Frac")

xlo= 0.5
xhi= 1
dx = 0.01
pdf_m['shr_frac'] = ((xlo,xhi,dx), "Shower Frac" )


# #Length

# xlo= 0
# xhi= 1.0
# dx = 0.025
# pdf_m['shr_length_ratio'] = ((xlo,xhi,dx),"shr_length_ratio")

# xlo= 0
# xhi= 1.0
# dx = 0.025
# pdf_m['trk_length_ratio'] = ((xlo,xhi,dx),"trk_length_ratio")


# #Width
# xlo= 0
# xhi= 1.0
# dx = 0.025
# pdf_m['shr_width_ratio'] = ((xlo,xhi,dx),"shr_width_ratio")

# xlo= 0
# xhi= 1.0
# dx = 0.025
# pdf_m['trk_width_ratio'] = ((xlo,xhi,dx),"trk_width_ratio")


# #Area
# xlo= 0
# xhi= 1.0
# dx = 0.025
# pdf_m['shr_area_ratio'] = ((xlo,xhi,dx),"shr_area_ratio")

# xlo= 0
# xhi= 1.0
# dx = 0.025
# pdf_m['trk_area_ratio'] = ((xlo,xhi,dx),"trk_area_ratio")

#qsum
# xlo= 0
# xhi= 100000
# dx = 1000
# pdf_m['shr_qsum_max'] = ((xlo,xhi,dx),"shr_qsum_max")

# xlo= 0
# xhi= 100000
# dx = 1000
# pdf_m['trk_qsum_max'] = ((xlo,xhi,dx),"trk_qsum_max")

# #area
# xlo= 0
# xhi= 1.0
# dx = 0.025
# pdf_m['shr_perimeter_ratio'] = ((xlo,xhi,dx),"shr_perimeter_ratio")

# xlo= 0
# xhi= 1.0
# dx = 0.025
# pdf_m['trk_perimeter_ratio'] = ((xlo,xhi,dx),"trk_perimeter_ratio")

# #area
# xlo= 0
# xhi= 1.0
# dx = 0.025
# pdf_m['shr_npixel_ratio'] = ((xlo,xhi,dx),"shr_npixel_ratio")

# xlo= 0
# xhi= 1.0
# dx = 0.025
# pdf_m['trk_npixel_ratio'] = ((xlo,xhi,dx),"trk_npixel_ratio")

sig_spectrum_m = {}
bkg_spectrum_m = {}


In [None]:
DRAW=True

for key,item in pdf_m.items():
    xlo,xhi,dx = item[0]
    name       = item[1]
    
    SS="scedr<@scedr & good_croi_ctr>0 & selected1L1P==1"
    
    ts_mdf0 = ts_mdf_m['nue'].query(SS).copy()
    ts_mdf1 = ts_mdf_m['cosmic'].copy()
    
    data0 = ts_mdf0[key].values
    data0 = data0[data0 >= xlo]
    data0 = data0[data0 <= xhi]
    
    data1 = ts_mdf1[key].values
    data1 = data1[data1 >= xlo]
    data1 = data1[data1 <= xhi]
    
    bkg_h = np.histogram(data1,bins=np.arange(xlo,xhi+dx,dx))
    sig_h = np.histogram(data0,bins=np.arange(xlo,xhi+dx,dx))
       
    bkg = bkg_h[0]
    sig = sig_h[0]
    
    bkg = np.where(bkg==0,1,bkg)
    sig = np.where(sig==0,1,sig)
    
    centers=bkg_h[1] + (bkg_h[1][1] - bkg_h[1][0]) / 2.0
    centers = centers[:-1]
    
    bkg_norm = bkg / float(bkg.sum())
    sig_norm = sig / float(sig.sum())
   
    bkg_err = np.sqrt(bkg)
    sig_err = np.sqrt(sig)

    bkg_err_norm = bkg_err /float(bkg.sum())
    sig_err_norm = sig_err /float(sig.sum())
    
    bkg_spectrum_m[key] = (centers,bkg_norm)
    sig_spectrum_m[key] = (centers,sig_norm)

    if DRAW: 
        fig,ax=plt.subplots(figsize=(10,6))
        data = bkg_h[1][:-1]
        bins = bkg_h[1]
        centers = data + (data[1] - data[0])/2.0
        
        ax.hist(data,bins=bins,weights=bkg_norm,histtype='stepfilled',color='red',lw=1,alpha=0.1)
        ax.hist(data,bins=bins,weights=bkg_norm,histtype='step',color='red',lw=2,label='Background')

        ax.hist(data,bins=bins,weights=sig_norm,histtype='stepfilled',color='blue',lw=1,alpha=0.1)
        ax.hist(data,bins=bins,weights=sig_norm,histtype='step',color='blue',lw=2,label='Signal')

        ax.errorbar(centers,bkg_norm,yerr=bkg_err_norm,fmt='o',color='red',markersize=0,lw=2)
        ax.errorbar(centers,sig_norm,yerr=sig_err_norm,fmt='o',color='blue',markersize=0,lw=2)
    
        ax.set_ylabel("Fraction of Vertices",fontweight='bold',fontsize=20)
        ax.set_xlabel(name,fontweight='bold',fontsize=20)
        ax.set_xlim(xlo,xhi)
        ax.legend(loc='best')
        ax.grid()
        plt.show()
    

In [None]:
def nearest_id(spectrum,value):
    return np.argmin(np.abs(spectrum - value))

def nearest_id_v(spectrum_v,value_v):
    return np.array([np.argmin(np.abs(spectrum[0] - value)) for spectrum, value in zip(spectrum_v,value_v)])

def LL(row):
    cols = row[sig_spectrum_m.keys()]
    sig_res = nearest_id_v(sig_spectrum_m.values(),cols.values)
    bkg_res = nearest_id_v(bkg_spectrum_m.values(),cols.values)
    
    sig_res = np.array([spectrum[1][v] for spectrum,v in zip(sig_spectrum_m.values(),sig_res)])
    bkg_res = np.array([spectrum[1][v] for spectrum,v in zip(bkg_spectrum_m.values(),bkg_res)])
    
    LL = np.log( sig_res / (sig_res + bkg_res) )
    return LL.sum()


In [None]:
ts_mdf_pass = {}
for name, comb_cut_df in ts_mdf_m.iteritems():
    comb_cut_df_copy = comb_cut_df.copy()
    comb_cut_df_copy['LL'] = comb_cut_df_copy.apply(LL,axis=1)

    print "Choosing vertex with max LL @name={}".format(name)
    passed_df = comb_cut_df_copy.copy()
    passed_df = passed_df.sort_values(["LL"],ascending=False).groupby(rse).head(1)
    ts_mdf_pass[name] = passed_df.copy()

In [None]:
fig,ax=plt.subplots(figsize=(10,6))

xlo=-50
xhi=0
dx=0.25
bins=np.arange(xlo,xhi+dx,dx)

data = ts_mdf_pass['cosmic']['LL'].values
weights = [1/float(data.size)]*data.size
k1_res = ax.hist(data,bins=bins,weights=weights,color='red' ,histtype='stepfilled',alpha=0.1,lw=2)
ax.hist(data,bins=bins,weights=weights,color='red' ,histtype='step',alpha=1.0,lw=2,label='Background')

scedr=5
SS="scedr<@scedr & good_croi_ctr>0 & selected1L1P==1"
data = ts_mdf_pass['nue'].query(SS)['LL'].values
weights = [1/float(data.size)]*data.size
k0_res = ax.hist(data,bins=bins,weights=weights,color='blue',histtype='stepfilled',alpha=0.1,lw=2)
ax.hist(data,bins=bins,weights=weights,color='blue',histtype='step',alpha=1.0,lw=2,label='Signal')

ax.set_xlabel("Log Likelihood",fontweight='bold')
ax.set_ylabel("Fraction of Events",fontweight='bold')
ax.legend(loc='upper left')
plt.grid()
plt.show()

In [None]:
k0_data = k0_res[0]
k0_total = float(k0_data.sum())
print "sig LL total",k0_total

k1_data = k1_res[0]
k1_total = float(k1_data.sum())
print "bkg LL total",k1_total

centers = bins + (bins[1]-bins[0])/2.0
centers = centers[:-1]


k0_sum  = [float(k0_data[ix:].sum()) for ix in xrange(centers.size)]
k1_sum  = [float(k1_data[ix:].sum()) for ix in xrange(centers.size)]

k01_sum = [k0_sum[ix] + k1_sum[ix] for ix in xrange(centers.size)]

k0_eff  = [k0_sum[ix] / k0_total for ix in xrange(centers.size)]
k1_eff  = [k1_sum[ix] / k1_total for ix in xrange(centers.size)]


k0_sum  = np.array(k0_sum)
k1_sum  = np.array(k1_sum)
k01_sum = np.array(k01_sum)

k0_eff = np.array(k0_eff)
k1_eff = np.array(k1_eff)

In [None]:
#Efficiency

fig,ax=plt.subplots(figsize=(10,6))


data = k1_eff * float(ts_mdf_m['cosmic'].index.size) / float(dfs['cosmic'].index.size)
k1_rej = 1.0 - data
ax.plot(centers,k1_rej,'-',color='red',lw=3,label='Background')
ax.grid()

scedr=5
SS="scedr<@scedr & good_croi_ctr>0 & selected1L1P==1 & energyInit<800 & energyInit>200"
print float(ts_mdf_pass['nue'].query(SS).index.size)
ratio = float(ts_mdf_pass['nue'].query(SS).index.size) / float(866.)
data = k0_eff * ratio
ax.plot(centers,data,'-',color='blue',lw=3,label='Signal')

kk0 = np.where(k1_rej>0.995)[0][0]
kk1 = np.where(k1_rej>0.999)[0][0]
print
print "99.5"
print "x  =",centers[kk0]
print "y_c=",k1_rej[kk0]
print "y_t=",k0_eff[kk0]*ratio
print
print "99.9"
print "x  =",centers[kk1]
print "y_c=",k1_rej[kk1]
print "y_t=",k0_eff[kk1]*ratio

LLCUT = centers[kk0]
ax.legend()
ax.set_xlabel("Log Likelihood",fontweight='bold')
ax.set_ylabel("Efficiency",fontweight='bold')
ax.legend(loc='best')
ax.set_ylim(0,1.0)
plt.show()



In [None]:
print ts_mdf_pass['cosmic'].query("LL>@LLCUT").index.size
print ts_mdf_pass['nue'].query("LL>@LLCUT & selected1L1P==1 & scedr<5").index.size
print ts_mdf_pass['nue'].query("LL>@LLCUT & selected1L1P==1 & scedr>5").index.size

In [None]:
matplotlib.rcParams['font.size']=20
matplotlib.rcParams['font.family']='serif'

LLCUT=-14.625
fix,ax=plt.subplots(figsize=(10,6))

Xmin = 200
Xmax = 800
dX   = 50

bins=np.arange(Xmin,Xmax + dX,dX)
SS = 'selected1L1P==1 & good_croi_ctr>0'
data=dfs['nue'].groupby(rse).nth(0).query(SS)['energyInit'].values
ax.hist(data,bins=bins,color='red',histtype='stepfilled',alpha=0.1)
sig = ax.hist(data,bins=bins,color='red',label='1L1P & Good cROI',histtype='step',lw=3)


SS+= '& LL>=@LLCUT'
data=ts_mdf_pass['nue'].query(SS)['energyInit'].values
ax.hist(data,bins=bins,color='green',histtype='stepfilled',alpha=0.1)
ax.hist(data,bins=bins,color='green',label='Pass Nue LL',histtype='step',lw=3)

SS+= '& scedr<5'
data=ts_mdf_pass['nue'].query(SS)['energyInit'].values
ax.hist(data,bins=bins,color='blue',alpha=0.1,histtype='stepfilled')
reco = ax.hist(data,bins=bins,color='blue',label='Pass Nue LL & Good Vertex',histtype='step',lw=3)

ax.set_ylim(0,150)
ax.set_xlim(200,800)
ax.grid()
ax.legend(loc='upper left',fontsize=18)

ax.set_ylabel("Events",fontweight='bold')
ax.set_xlabel("True Neutrino Energy [MeV]",fontweight='bold')
plt.savefig("00_E.pdf")
plt.show()


In [None]:

fig,ax = plt.subplots(figsize=(10,6))
    
reco_sig = reco[0] / sig[0]
reco_sig = np.nan_to_num(reco_sig)
bidx     = np.nonzero(reco_sig)
signal_v = sig[0]
param_v  = sig[1][:-1] + float(dX)/2.0
eff_v    = reco_sig

res_v     = np.where(eff_v==0)[0]
eff_vv    = []
signal_vv = []
param_vv  = []
eff_v_    = []
signal_v_ = []
param_v_  = []


for ix in xrange(eff_v.size):
    if ix in res_v:
        eff_vv.append(np.array(eff_v_))
        signal_vv.append(np.array(signal_v_))
        param_vv.append(np.array(param_v_))
        eff_v_    = []
        signal_v_ = []
        param_v_  = []
        continue

    eff_v_.append(eff_v[ix])
    signal_v_.append(signal_v[ix])
    param_v_.append(param_v[ix])

eff_vv.append(np.array(eff_v_))
signal_vv.append(np.array(signal_v_))
param_vv.append(np.array(param_v_))

for param_v_,eff_v_,signal_v_ in zip(param_vv,eff_vv,signal_vv):
    ax.plot(param_v_,eff_v_,'o',color='blue',markersize=8)
    ax.errorbar(param_v_,eff_v_,yerr= np.sqrt( eff_v_ * ( 1 - eff_v_ ) / signal_v_ ),lw=2,color='blue')

ax.set_ylabel("Reco. Efficiency",fontweight='bold')
ax.set_xlabel("True Neutrino Energy [MeV]",fontweight='bold')
ax.set_ylim(0,1.0)
plt.grid()
plt.savefig("00_eff.pdf")
plt.show()

In [None]:
LLCUT=-18.25
plt.hist(ts_mdf_pass['nue'].query("LL>=@LLCUT").LL.values)
plt.show()
plt.hist(ts_mdf_pass['cosmic'].query("LL>=@LLCUT").LL.values)
plt.show()

In [None]:
print
print "cosmic"
fout = open("these_cosmic.sh","w+")
for run,subrun,event in ts_mdf_pass['cosmic'].query("LL>=@LLCUT")[['run','subrun','event']].values:
    rse = (run,subrun,event)
    fout.write("python dump_img.py")
    fout.write(" ")
    f=os.path.basename(res.set_index(['run','subrun','event']).loc[rse].fname)
    fout.write(os.path.join("cosmic_in",f))
    fout.write(" ")
    num = int(f.split(".")[0].split("_")[-1])
    fout.write(os.path.join("cosmic_out","out_%d.root" % num))
    fout.write(" ")
    erv = ts_mdf_pass['cosmic'].set_index(['run','subrun','event']).loc[rse][['entry','roid','cvtxid']].values
    fout.write("%d %d %d &" %(erv[0],erv[1],erv[2]))
    fout.write("\n")
    print rse,"\t",f,"%d %d %d &" %(erv[0],erv[1],erv[2])
    
fout.close()
print
print "nue"
fout = open("these_nue.sh","w+")
for run,subrun,event in ts_mdf_pass['nue'].query("LL>=@LLCUT & scedr>5 & selected1L1P==1 & energyInit>200 & energyInit<800")[['run','subrun','event']].values:
    rse = (run,subrun,event)
    fout.write("python dump_img.py")
    fout.write(" ")
    f=os.path.basename(res_nue.set_index(['run','subrun','event']).loc[rse].fname)
    print rse,"\t",f
    fout.write(os.path.join("nue_in",f))
    fout.write(" ")
    num = int(f.split(".")[0].split("_")[-1])
    fout.write(os.path.join("nue_out","out_%d.root" % num))
    fout.write(" ")
    erv = ts_mdf_pass['nue'].set_index(['run','subrun','event']).loc[rse][['entry','roid','cvtxid']].values
    fout.write("%d %d %d &" %(erv[0],erv[1],erv[2]))
    fout.write("\n")
fout.close()

rse=['run','subrun','event']

In [None]:
res = pd.DataFrame(rn.root2array("comb_trk_cosmic.root",treename="EventCosmicTrackTree"))
rse=['run','subrun','event']
res = res.set_index(rse)
print list(res.columns)

In [None]:
nue_group = dfs['nue'].groupby(rse).nth(0).query("selected1L1P==1")
print nue_group.index.size

In [None]:
res_comb = pd.concat([nue_group,res],axis=1,join_axes=[nue_group.index])

In [None]:
### matplotlib.rcParams['font.size']=20
for thing in ['n_top_pts', 'n_bot_pts', 'n_up_pts', 'n_down_pts', 'n_anode_pts', 'n_cathode_pts',
              'num_croi',
              'n_thru_mu_trk', 'n_stop_mu_trk',
              'num_vertex']:
    if thing != 'n_top_pts':
        continue
    print thing
    data0 = np.nan_to_num(res_comb[thing].values)
    data1 = np.nan_to_num(res_comb.query("min_vtx_dist<5 & num_vertex>0")[thing].values)
    data2 = np.nan_to_num(res_comb.query("min_vtx_dist>5 & num_vertex>0")[thing].values)
    bins = np.arange(0,102,2)
    weights0=[1/float(res_comb.index.size)] * data0.size
    #weights1=[1/float(res_comb.index.size)] * data1.size
    #weights2=[1/float(res_comb.index.size)] * data2.size
    
    weights1=[1/float(data1.size)] * data1.size
    weights2=[1/float(data2.size)] * data2.size
    
    #plt.hist(data0,bins=bins,weights=weights0,color='red',histtype='stepfilled',alpha=0.1,lw=3)
    plt.hist(data1,bins=bins,weights=weights1,color='blue',histtype='stepfilled',alpha=0.1,lw=3)
    plt.hist(data2,bins=bins,weights=weights2,color='green',histtype='stepfilled',alpha=0.1,lw=3)
    
    #plt.hist(data0,bins=bins,weights=weights0,color='red',histtype='step',alpha=1.0,label="all",lw=3)
    plt.hist(data1,bins=bins,weights=weights1,color='blue',histtype='step',alpha=1.0,label="good",lw=3)
    plt.hist(data2,bins=bins,weights=weights2,color='green',histtype='step',alpha=1.0,label="bad",lw=3)
    
    fig=plt.gcf()
    fig.set_size_inches(10,6)

    ax=plt.gca()
    ax.set_xlabel("Number of Top Piercing",fontweight='bold')
    ax.set_ylabel("Event Fraction\n1e1p with 1L1P Filter",fontweight='bold')
    ax.legend()
    ax.grid()
    plt.tight_layout()
    ax=plt.gca()
    ax.set_xlim(0,40)
    plt.savefig("/home/vgenty/{}_png".format(thing))
    plt.show()
    plt.clf()
    plt.cla()
    plt.close()

In [None]:
res = pd.DataFrame(rn.root2array("comb_trk_cosmic_extbnb.root",treename="EventCosmicTrackTree"))
rse=['run','subrun','event']
res = res.set_index(rse)

cosmic_group = dfs['cosmic'].groupby(rse).nth(0)
print cosmic_group.index.size

res_comb = pd.concat([cosmic_group,res],axis=1,join_axes=[cosmic_group.index])

In [None]:
matplotlib.rcParams['font.size']=20
for thing in ['n_top_pts', 'n_bot_pts', 'n_up_pts', 'n_down_pts', 'n_anode_pts', 'n_cathode_pts',
              'num_croi',
              'n_thru_mu_trk', 'n_stop_mu_trk',
              'num_vertex']:
    print thing
    #data0 = np.nan_to_num(res_comb[thing].values)
    data1 = np.nan_to_num(res_comb.query("num_vertex>0")[thing].values)
    bins = np.arange(0,102,2)
    #weights0=[1/float(res_comb.index.size)] * data0.size
    weights1=[1/float(res_comb.index.size)] * data1.size
    
    plt.hist(data1,bins=bins,weights=weights1,color='blue',histtype='stepfilled',alpha=0.1,lw=3)
    #plt.hist(data0,bins=bins,weights=weights0,color='red',histtype='stepfilled',alpha=0.1,lw=3)
    
    plt.hist(data1,bins=bins,weights=weights1,color='blue',histtype='step',alpha=1.0,label="...",lw=3)
    #plt.hist(data0,bins=bins,weights=weights0,color='red',histtype='step',alpha=1.0,label="all",lw=3)
    fig=plt.gcf()
    fig.set_size_inches(10,6)
    ax=plt.gca()
    ax.set_xlabel(thing,fontweight='bold')
    ax.set_ylabel("Event Fraction",fontweight='bold')
    ax.legend()
    ax.grid()
    plt.tight_layout()
    #plt.savefig("{}_png".format(thing))
    plt.show()
    plt.clf()
    plt.cla()
    plt.close()

In [None]:
ts_mdf_pass['nue'].query("LL>@LLCUT ").index.size