In [None]:
%matplotlib inline

import ROOT, sys, os
from ROOT import std

from larcv import larcv
from larlite import larlite as ll
from larlite import larutil as lu

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.path as path
import matplotlib.patches as patches

from ROOT import geo2d,cv
from ROOT.cv import Point_ as Vector
DTYPE='float'

import root_numpy as rn
import pandas as pd

geoh=lu.GeometryHelper.GetME()
geo=lu.Geometry.GetME()
larp=lu.LArProperties.GetME()
pygeo = geo2d.PyDraw()

matplotlib.rcParams['font.size']=20
matplotlib.rcParams['font.family']='serif'

In [None]:
INFILE="../test.root"

reco_event_df = pd.DataFrame(rn.root2array(INFILE,treename='EventTree'))
reco_event_df = reco_event_df.drop_duplicates(subset=['run','subrun','event'])

#reco_vtx3d_df = pd.DataFrame(rn.root2array(INFILE,treename='Vtx3DTree'))
#reco_vtx3d_df = reco_vtx3d_df.drop_duplicates(subset=['run','subrun','event','vtx3d_id'])

#reco_particle_df = pd.DataFrame(rn.root2array(INFILE,treename='ParticleTree'))
#reco_particle_df = reco_particle_df.drop_duplicates(subset=['run','subrun','event','vtx3d_id','plane_id'])

reco_defect_df = pd.DataFrame(rn.root2array(INFILE,treename='DefectDefectTree'))
reco_defect_df = reco_defect_df.drop_duplicates(subset=['run','subrun','event'])

reco_cluster_event_df = pd.DataFrame(rn.root2array(INFILE, treename = 'ClusterEventTree'))
reco_cluster_event_df = reco_cluster_event_df.drop_duplicates(subset=['run','subrun','event'])

reco_cluster_plane_df = pd.DataFrame(rn.root2array(INFILE, treename = 'ClusterPlaneTree'))
reco_cluster_plane_df = reco_cluster_plane_df.drop_duplicates(subset=['run','subrun','event','plane'])

mc_df = pd.DataFrame(rn.root2array(INFILE,treename='mctree'))
mc_df = mc_df.drop_duplicates(subset=['run','subrun','event'])

signal_mc_df = mc_df[mc_df.signal==1].copy().reindex()
back_mc_df   = mc_df[mc_df.signal==0].copy().reindex()
print signal_mc_df.index.size
print back_mc_df.index.size

In [None]:
# check if there is a good vertex or not
def correct(reco_vtx3d_,group_,chosen_idx_):

    ### use projected vertex
    
    # recox = reco_vtx3d_.vtx2d_x_v.values[chosen_idx_]
    # recoy = reco_vtx3d_.vtx2d_y_v.values[chosen_idx_]

    ### use circle vertex
    
    recox = reco_vtx3d_.circle_vtx_x_v.values[chosen_idx_]
    recoy = reco_vtx3d_.circle_vtx_y_v.values[chosen_idx_]
    
    mcx = group_.vtx2d_t.values[0]
    mcy = group_.vtx2d_w.values[0]
    
    dist=np.sqrt(np.power(recox-mcx,2)+np.power(recoy-mcy,2))

    dist.sort()

    dist0_b = False
    dist1_b = False
    
    if dist[0] <=7 : dist0_b = True
    if dist[1] <=7 : dist1_b = True
    
    return (dist0_b==True and dist1_b==True)
    

def do_selection(mc_sdf,reco_event_df,reco_vtx3d_df):

    selected = []

    mc_gdf  = mc_sdf.set_index(['run','subrun','event'])
    groups_ = mc_gdf.groupby(mc_gdf.index)
    
    for name, group in groups_:

        nvtx_b  = False
        close_b = False

        query_='run=={}&subrun=={}&event=={}'.format(name[0],name[1],name[2])
        
        reco_event = reco_event_df.query(query_)

        if reco_event.n_vtx3d.values > 0: 
            nvtx_b = True
        
        if nvtx_b == False: 
            continue
            
        reco_vtx3d = reco_vtx3d_df.query(query_)

        if ( reco_event.n_vtx3d.values != reco_vtx3d.vtx3d_id.size ):
            pass
            # print query_

        good_reco = False
        for idx_ in xrange(reco_event.n_vtx3d):

            if good_reco==True : break
            good_reco = correct(reco_vtx3d,group,idx_)

        if good_reco==True: 
            close_b=True

        if nvtx_b==True and close_b==True:
            selected.append(mc_sdf.query(query_).index.values[0])

    return np.array(selected)

# Reconstruction Efficiency on Signal

In [None]:
mc_df=signal_mc_df
#mc_df=back_mc_df
n_reco_v = do_selection(mc_df,reco_event_df,reco_vtx3d_df)
print "Reco Efficiency is ", n_reco_v.size,"/",mc_df.index.size,"= ",float(n_reco_v.size)/float(mc_df.index.size)
print n_reco_v

In [None]:
# Index by run,subrun,event
index_=['run','subrun','event']

#Signal dataframes
# sig_event_df    = reco_event_df.copy()
# sig_vtx3d_df    = reco_vtx3d_df.copy()
# sig_particle_df = reco_particle_df.copy()
sig_defect_df        = reco_defect_df.copy()
sig_cluster_event_df = reco_cluster_event_df.copy()
sig_cluster_plane_df = reco_cluster_plane_df.copy()
   
sig_mc_df       = signal_mc_df.set_index(index_)
# sig_event_df    = sig_event_df.set_index(index_)
# sig_vtx3d_df    = sig_vtx3d_df.set_index(index_)
# sig_particle_df = sig_particle_df.set_index(index_)
sig_defect_df        = sig_defect_df.set_index(index_)
sig_cluster_event_df = sig_cluster_event_df.set_index(index_)
sig_cluster_plane_df = sig_cluster_plane_df.set_index(index_)

# sig_event_df    = sig_event_df.ix[sig_mc_df.index]
# sig_vtx3d_df    = sig_vtx3d_df.ix[sig_mc_df.index]
# sig_particle_df = sig_particle_df.ix[sig_mc_df.index]
sig_defect_df        = sig_defect_df.ix[sig_mc_df.index]
sig_cluster_event_df = sig_cluster_event_df.ix[sig_mc_df.index]
sig_cluster_plane_df = sig_cluster_plane_df.ix[sig_mc_df.index]

#Background dataframes
# bak_event_df    = reco_event_df.copy()
# bak_vtx3d_df    = reco_vtx3d_df.copy()
# bak_particle_df = reco_particle_df.copy()
bak_defect_df        = reco_defect_df.copy()
bak_cluster_event_df = reco_cluster_event_df.copy()
bak_cluster_plane_df = reco_cluster_plane_df.copy()

bak_mc_df            = back_mc_df.set_index(index_)
# bak_event_df    = bak_event_df.set_index(index_)
# bak_vtx3d_df    = bak_vtx3d_df.set_index(index_)
# bak_particle_df = bak_particle_df.set_index(index_)
bak_defect_df        = bak_defect_df.set_index(index_)
bak_cluster_event_df = bak_cluster_event_df.set_index(index_)
bak_cluster_plane_df = bak_cluster_plane_df.set_index(index_)

# bak_event_df    = bak_event_df.ix[bak_mc_df.index]
# bak_vtx3d_df    = bak_vtx3d_df.ix[bak_mc_df.index]
# bak_particle_df = bak_particle_df.ix[bak_mc_df.index]
bak_defect_df        = bak_defect_df.ix[bak_mc_df.index]
bak_cluster_event_df = bak_cluster_event_df.ix[bak_mc_df.index]
bak_cluster_plane_df = bak_cluster_plane_df.ix[bak_mc_df.index]


In [None]:
plt.hist(np.nan_to_num(bak_defect_df.defect_dist/bak_defect_df.defect_dist_start_end), 
        color='blue', alpha=0.8, bins=np.arange(0.01,0.2,0.005),normed = True)
plt.hist(np.nan_to_num(sig_defect_df.defect_dist/sig_defect_df.defect_dist_start_end), 
        color='red',  alpha=0.8, bins=np.arange(0.01,0.2,0.005),normed = True)
plt.title('dist ratio')
plt.plot()

In [None]:
# plt.hist(np.nan_to_num(bak_defect_df.defect_dist), 
#         color='blue', alpha=0.8, bins=np.arange(0,300,5),normed=True)
# plt.hist(np.nan_to_num(bak_defect_df.defect_dist_start_end), 
#         color='red', alpha=0.8, bins=np.arange(0,300,5),normed=True)
# plt.plot()

In [None]:
# plt.hist(np.nan_to_num(sig_defect_df.defect_dist), 
#         color='blue', alpha=0.8, bins=np.arange(0,300,5),normed=True)
# plt.hist(np.nan_to_num(sig_defect_df.defect_dist_start_end), 
#         color='red', alpha=0.8, bins=np.arange(0,300,5),normed=True)
# plt.plot()

In [None]:
plt.hist(np.nan_to_num(bak_defect_df.defect_dist), 
        color='blue', alpha=0.8, bins=np.arange(1,80,2),normed=True)
plt.hist(np.nan_to_num(sig_defect_df.defect_dist), 
        color='red', alpha=0.8, bins=np.arange(1,80,2),normed=True)
plt.title("defect dist")
plt.plot()

In [None]:
plt.hist(np.nan_to_num(bak_defect_df.defect_dist_start_end), 
        color='blue', alpha=0.8, bins=np.arange(1,500,20),normed=True)
plt.hist(np.nan_to_num(sig_defect_df.defect_dist_start_end), 
        color='red', alpha=0.8, bins=np.arange(1,500,20),normed=True)
plt.title("defect dist start2end(long side)")
plt.plot()

In [None]:
plt.hist(np.nan_to_num(bak_cluster_event_df.avg_long_hip_length), 
        color='blue', alpha=0.8, bins=np.arange(0,60,1),normed = True)
plt.hist(np.nan_to_num(sig_cluster_event_df.avg_long_hip_length), 
        color='red', alpha=0.8, bins=np.arange(0,60,1),normed = True)
plt.title("avg longest hip length")
plt.plot()

In [None]:
plt.hist(np.nan_to_num(bak_cluster_event_df.avg_long_mip_length), 
        color='blue', alpha=0.8, bins=np.arange(4,500,20),normed = True)
plt.hist(np.nan_to_num(sig_cluster_event_df.avg_long_mip_length), 
        color='red', alpha=0.8, bins=np.arange(4,500,20),normed = True)
plt.title("avg longest mip length")
plt.plot()

In [None]:
plt.hist(np.nan_to_num(bak_cluster_event_df.avg_long_hip_width), 
        color='blue', alpha=0.8, bins=np.arange(0,20,0.5),normed = True)
plt.hist(np.nan_to_num(sig_cluster_event_df.avg_long_hip_width), 
        color='red', alpha=0.8, bins=np.arange(0,20,0.5),normed = True)
plt.title("avg longest hip width")
plt.plot()

In [None]:
plt.hist(np.nan_to_num(bak_cluster_event_df.avg_long_mip_width), 
        color='blue', alpha=0.8, bins=np.arange(0,25,1),normed = True)
plt.hist(np.nan_to_num(sig_cluster_event_df.avg_long_mip_width), 
        color='red', alpha=0.8, bins=np.arange(0,25,1),normed = True)
plt.title("avg longest mip width")
plt.plot()

In [None]:
plt.figure()
plt.hist(np.nan_to_num(bak_cluster_event_df.avg_long_hip_qsum),
        color='blue', alpha=0.8, bins=np.arange(0,15000,300),normed = True)
plt.hist(np.nan_to_num(sig_cluster_event_df.avg_long_hip_qsum), 
        color='red', alpha=0.8, bins=np.arange(0,15000,300),normed = True)
plt.title("avg longest hip qsum, (incluing zeros)")
plt.xticks(rotation='vertical')
plt.plot()

plt.figure()
plt.hist(np.nan_to_num(bak_cluster_event_df.avg_long_hip_qsum),
        color='blue', alpha=0.8, bins=np.arange(1,15000,300),normed = True)
plt.hist(np.nan_to_num(sig_cluster_event_df.avg_long_hip_qsum), 
        color='red', alpha=0.8, bins=np.arange(1,15000,300),normed = True)
plt.title("avg longest hip qsum, (excluing zeros)")
plt.xticks(rotation='vertical')
plt.plot()

In [None]:
plt.hist(np.nan_to_num(bak_cluster_event_df.avg_long_hip_qavg),
        color='blue', alpha=0.8, bins=np.arange(0,200,5),normed = True)
plt.hist(np.nan_to_num(sig_cluster_event_df.avg_long_hip_qavg), 
        color='red', alpha=0.8, bins=np.arange(0,200,5),normed = True)
plt.title("avg longest hip qavg")
plt.plot()

In [None]:
plt.hist(np.nan_to_num(bak_cluster_event_df.avg_long_mip_qsum),
        color='blue', alpha=0.8, bins=np.arange(50,30000,500),normed = True)
plt.hist(np.nan_to_num(sig_cluster_event_df.avg_long_mip_qsum), 
        color='red', alpha=0.8, bins=np.arange(50,30000,500),normed = True)
plt.title("avg longest mip qsum")
plt.plot()

In [None]:
plt.hist(np.nan_to_num(bak_cluster_event_df.avg_long_mip_qavg),
        color='blue', alpha=0.8, bins=np.arange(0,40,2),normed = True)
plt.hist(np.nan_to_num(sig_cluster_event_df.avg_long_mip_qavg), 
        color='red', alpha=0.8, bins=np.arange(0,40,2),normed = True)
plt.title("avg longest mip qavg")
plt.plot()

In [None]:
plt.figure()
plt.hist(np.nan_to_num(bak_cluster_event_df.avg_long_hip_npx),
        color='blue', alpha=0.8, bins=np.arange(0,40,2),normed = True)
plt.hist(np.nan_to_num(sig_cluster_event_df.avg_long_hip_npx), 
        color='red', alpha=0.8, bins=np.arange(0,40,2),normed = True)
plt.title("avg longest hip npx, (including 0s)")
plt.plot()

plt.figure()
plt.hist(np.nan_to_num(bak_cluster_event_df.avg_long_hip_npx),
        color='blue', alpha=0.8, bins=np.arange(1,40,2),normed = True)
plt.hist(np.nan_to_num(sig_cluster_event_df.avg_long_hip_npx), 
        color='red', alpha=0.8, bins=np.arange(1,40,2),normed = True)
plt.title("avg longest hip npx, (excluding 0s)")
plt.plot()

In [None]:
plt.hist(np.nan_to_num(bak_cluster_event_df.avg_long_mip_npx),
        color='blue', alpha=0.8, bins=np.arange(0,1000,50),normed = True)
plt.hist(np.nan_to_num(sig_cluster_event_df.avg_long_mip_npx), 
        color='red', alpha=0.8, bins=np.arange(0,1000,50),normed = True)
plt.title("avg longest mip npx")
plt.plot()

# Below plots are plane-wise

In [None]:
for x in xrange(3):
    plt.figure()
    plt.hist(np.nan_to_num(bak_cluster_plane_df.query('plane==@x').long_hip_length),
            color='blue', alpha=0.8, bins=np.arange(1,60,2),normed = True)
    plt.hist(np.nan_to_num(sig_cluster_plane_df.query('plane==@x').long_hip_length),
            color='red', alpha=0.8, bins=np.arange(1,60,2),normed = True)
    plt.title('long_hip_length, plane %s'%x)    
    plt.plot()

In [None]:
for x in xrange(3):
    plt.figure()
    plt.hist(np.nan_to_num(bak_cluster_plane_df.query('plane==@x').long_mip_length),
            color='blue', alpha=0.8, bins=np.arange(1,500,20),normed = True)
    plt.hist(np.nan_to_num(sig_cluster_plane_df.query('plane==@x').long_mip_length),
            color='red', alpha=0.8, bins=np.arange(1,500,20),normed = True)
    plt.title('long_mip_length, plane %s'%x)    
    plt.plot()

In [None]:
for x in xrange(3):
    plt.figure()
    plt.hist(np.nan_to_num(bak_cluster_plane_df.query('plane==@x').long_hip_width),
            color='blue', alpha=0.8, bins=np.arange(0,12,0.5),normed = True)
    plt.hist(np.nan_to_num(sig_cluster_plane_df.query('plane==@x').long_hip_width),
            color='red', alpha=0.8, bins=np.arange(0,12,0.5),normed = True)
    plt.title('long_hip_width, plane %s'%x)    
    plt.plot()

In [None]:
for x in xrange(3):
    plt.figure()
    plt.hist(np.nan_to_num(bak_cluster_plane_df.query('plane==@x').long_mip_width),
            color='blue', alpha=0.8, bins=np.arange(0,20,0.5),normed = True)
    plt.hist(np.nan_to_num(sig_cluster_plane_df.query('plane==@x').long_mip_width),
            color='red', alpha=0.8, bins=np.arange(0,20,0.5),normed = True)
    plt.title('long_mip_width, plane %s'%x)    
    plt.plot()

In [None]:
#why long qsum per plane does not agree with the sum over 3 planes ???
for x in xrange(3):
    plt.figure()
    plt.hist(np.nan_to_num(bak_cluster_plane_df.query('plane==@x').long_hip_qsum),
            color='blue', alpha=0.8, bins=np.arange(10,1500,50),normed = True)
    plt.hist(np.nan_to_num(sig_cluster_plane_df.query('plane==@x').long_hip_qsum),
            color='red', alpha=0.8, bins=np.arange(10,1500,50),normed = True)
    plt.title('long_hip_qsum, plane %s'%x)  
    plt.xticks(rotation = 'vertical')
    plt.plot()

In [None]:
for x in xrange(3):
    plt.figure()
    plt.hist(np.nan_to_num(bak_cluster_plane_df.query('plane==@x').long_hip_qavg),
            color='blue', alpha=0.8, bins=np.arange(10,200,5),normed = True)
    plt.hist(np.nan_to_num(sig_cluster_plane_df.query('plane==@x').long_hip_qavg),
            color='red', alpha=0.8, bins=np.arange(10,200,5),normed = True)
    plt.title('long_hip_qavg, plane %s'%x)    
    plt.plot()

In [None]:
for x in xrange(3):
    plt.figure()
    plt.hist(np.nan_to_num(bak_cluster_plane_df.query('plane==@x').long_hip_npx),
            color='blue', alpha=0.8, bins=np.arange(10,200,5),normed = True)
    plt.hist(np.nan_to_num(sig_cluster_plane_df.query('plane==@x').long_hip_npx),
            color='red', alpha=0.8, bins=np.arange(10,200,5),normed = True)
    plt.title('long_hip_npx, plane %s'%x)    
    plt.plot()

In [None]:
for x in xrange(3):
    plt.figure()
    plt.hist(np.nan_to_num(bak_cluster_plane_df.query('plane==@x').long_mip_npx),
            color='blue', alpha=0.8, bins=np.arange(10,200,5),normed = True)
    plt.hist(np.nan_to_num(sig_cluster_plane_df.query('plane==@x').long_mip_npx),
            color='red', alpha=0.8, bins=np.arange(10,200,5),normed = True)
    plt.title('long_mip_npx, plane %s'%x)    
    plt.plot()

In [None]:
for x in xrange(3):
    plt.figure()
    plt.hist(np.nan_to_num(bak_cluster_plane_df.query('plane==@x').long_hip_angle),
            color='blue', alpha=0.8, bins=np.arange(-90,90,5),normed = True)
    plt.hist(np.nan_to_num(sig_cluster_plane_df.query('plane==@x').long_hip_angle),
            color='red', alpha=0.8, bins=np.arange(-90,90,5),normed = True)
    plt.title('long_hip_angle, plane %s'%x)    
    plt.plot()

In [None]:
for x in xrange(3):
    plt.figure()
    plt.hist(np.nan_to_num(bak_cluster_plane_df.query('plane==@x').long_mip_angle),
            color='blue', alpha=0.8, bins=np.arange(-90,90,5),normed = True)
    plt.hist(np.nan_to_num(sig_cluster_plane_df.query('plane==@x').long_mip_angle),
            color='red', alpha=0.8, bins=np.arange(-90,90,5),normed = True)
    plt.title('long_mip_angle, plane %s'%x)    
    plt.plot()