In [None]:
%matplotlib inline

import ROOT, sys, os
from ROOT import std

from larcv import larcv
from larlite import larlite as ll
from larlite import larutil as lu

import numpy as np

import scipy
from scipy import stats

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.path as path
import matplotlib.patches as patches

from ROOT import geo2d,cv
from ROOT.cv import Point_ as Vector
DTYPE='float'

import root_numpy as rn
import pandas as pd

geoh=lu.GeometryHelper.GetME()
geo=lu.Geometry.GetME()
larp=lu.LArProperties.GetME()
pygeo = geo2d.PyDraw()

matplotlib.rcParams['font.size']=20
matplotlib.rcParams['font.family']='serif'

In [None]:
INFILE="out.root"

base_index = ['run','subrun','event']

trees_m = { 
            'mctree' : base_index,
            'EventTree' : base_index,
            'Vtx3DTree' : base_index + ['vtx3d_id'],
          }

           
df_m = {}
signal_df_m = {}
back_df_m = {}
signal_reco_df_m = {}
back_reco_df_m = {}

# drop duplicates
for tree_name_, index_ in trees_m.iteritems():
    df = pd.DataFrame(rn.root2array(INFILE,treename=tree_name_))
    df = df.drop_duplicates(subset=index_)
    df_m[tree_name_] = df.set_index(base_index)
    
signal_mc_idx = df_m['mctree'].query('signal==1').index
back_mc_idx   = df_m['mctree'].query('signal==1').index

# select signal v. background, 
# reindex
for tree_name_, index_ in trees_m.iteritems():
    signal_df = df_m[tree_name_].ix[signal_mc_idx]
    back_df   = df_m[tree_name_].ix[back_mc_idx]
    
    #sometimes IX clobbers index name, reset the name
    signal_df.index.names = base_index
    back_df.index.names = base_index
    
    signal_df_m[tree_name_] = signal_df.copy()
    back_df_m[tree_name_]   = back_df.copy()


#select only events which are reconstructed
#signal_reco_rse_idx = signal_df_m['Vtx3DEventTree'].query("n_vtx3d>0").index
#back_reco_rse_idx   = back_df_m['Vtx3DEventTree'].query("n_vtx3d>0").index

for tree_name_, index_ in trees_m.iteritems():
    signal_reco_df_m[tree_name_] = signal_df_m[tree_name_]#.ix[signal_reco_rse_idx].copy()
    back_reco_df_m[tree_name_]   = back_df_m[tree_name_]#.ix[back_reco_rse_idx].copy()

#set desired index
for tree_name_, index_ in trees_m.iteritems():
    signal_reco_df_m[tree_name_] = signal_reco_df_m[tree_name_].reset_index()
    back_reco_df_m[tree_name_]   = back_reco_df_m[tree_name_].reset_index()

    signal_reco_df_m[tree_name_] = signal_reco_df_m[tree_name_].set_index(index_)
    back_reco_df_m[tree_name_]   = back_reco_df_m[tree_name_].set_index(index_)

# Choose good vertex candidates

In [None]:
s_vtx_tree = signal_reco_df_m['Vtx3DTree'].reset_index().set_index(base_index)
s_mc_tree  = signal_reco_df_m['mctree'].reset_index().set_index(base_index)

b_vtx_tree = back_reco_df_m['Vtx3DTree'].reset_index().set_index(base_index)
b_mc_tree  = back_reco_df_m['mctree'].reset_index().set_index(base_index)

def pick_good_vertex(sb_mc_tree,sb_vtx_tree):
    
    good_vtx_sb_v={}
    good_vtx_id_v={}
    
    for index, row in sb_mc_tree.iterrows():

        vtx_entry = sb_vtx_tree.ix[index]

        if type(vtx_entry) != pd.core.frame.DataFrame: 
            good_vtx_sb_v[index]  = False
            good_vtx_id_v[index] = -1
            continue

        vtx_x_vv= np.row_stack(vtx_entry.vtx2d_x_v.values)
        vtx_y_vv= np.row_stack(vtx_entry.vtx2d_y_v.values)

        dx = vtx_x_vv - row.vtx2d_t
        dy = vtx_y_vv - row.vtx2d_w

        dt = np.sqrt(dx*dx + dy*dy)            # compute the distance from true to all candidates
        min_idx=dt.mean(axis=1).argmin()       # get the smallest mean distance from candidates
        dt_b = (dt <= 7).sum(axis=1)           # vtx must be less than 7 pixels away
        n_close_vtx = len(np.where(dt_b>1)[0]) # event has >0 close verticies

        good_vtx_sb_v[index]  = n_close_vtx>0
        good_vtx_id_v[index]  = vtx_entry.vtx3d_id.values[min_idx]

    good_vtx_sb_v  = pd.Series(good_vtx_sb_v)
    good_vtx_id_v  = pd.Series(good_vtx_id_v) 

    sb_vtx_df=pd.DataFrame([good_vtx_sb_v,good_vtx_id_v]).T
    sb_vtx_df.columns=['good','idx']

    return sb_vtx_df

s_vtx_df = pick_good_vertex(s_mc_tree,s_vtx_tree)
s_vtx_df.index.names = base_index
b_vtx_df = pick_good_vertex(b_mc_tree,b_vtx_tree)
b_vtx_df.index.names = base_index

# Good selected vertex dataframe

In [None]:
#get the signal dataframe
sig_vtx3d=signal_df_m["Vtx3DTree"].reset_index().set_index(base_index + ["vtx3d_id"])
#get the background dataframe
back_vtx3d=back_df_m["Vtx3DTree"].reset_index().set_index(base_index + ["vtx3d_id"])

#......signal........
#get the good vertex dataframe
good_vtx_tmp=s_vtx_df.copy()
good_vtx_tmp=good_vtx_tmp.reset_index()
good_vtx_tmp.columns=base_index+['good','vtx3d_id']
good_vtx_tmp=good_vtx_tmp.set_index(base_index + ['vtx3d_id'])
good_vtx_tmp=good_vtx_tmp.query("good==1.0")
sig_good_vtx_df=sig_vtx3d.ix[good_vtx_tmp.index]
del good_vtx_tmp

#......background........
#get the good vertex dataframe
good_vtx_tmp=b_vtx_df.copy()
good_vtx_tmp=good_vtx_tmp.reset_index()
good_vtx_tmp.columns=base_index+['good','vtx3d_id']
good_vtx_tmp=good_vtx_tmp.set_index(base_index + ['vtx3d_id'])
good_vtx_tmp=good_vtx_tmp.query("good==1.0")
back_good_vtx_df=back_vtx3d.ix[good_vtx_tmp.index]
del good_vtx_tmp

In [None]:
print signal_df_m['mctree'].index.size
print len(signal_df_m['Vtx3DTree'].reset_index().groupby(base_index))
print sig_good_vtx_df.index.size