# Reference
[uproot documentation](https://uproot.readthedocs.io/en/latest/)

In [None]:
def make_sequential(obj):
    seq_events = np.zeros_like(obj.event)
    seq_events[0] = obj.event[0]
    seq_event = 0
    seq_files = np.zeros_like(obj.event)
    seq_files[0] = obj.event[0]
    seq_file = 0
    for i in range(1, len(obj.event)):
        if obj.event[i] != obj.event[i - 1]:
            seq_event += 1
        if obj.event[i] < obj.event[i - 1]:
            seq_file += 1
        seq_events[i] = seq_event
        seq_files[i] = seq_file
    obj.event = seq_events
    obj.file = seq_files

In [None]:
import uproot, numpy as np

class MCValidation:
    def __init__(self, filename, treename):
        file = uproot.open(filename)
        tree = file[treename]
        self.event = tree['event'].array(library="np") #linear list of events
        self.orig_event = tree["event"].array(library="np") #actual event number
        self.file = np.zeros_like(self.event)
        self.mc_id = tree["mcId"].array(library="np") #number of mc particles in event
        self.mc_pdg = tree["mcPDG"].array(library="np") #pdg code of particles in each event
        self.mc_tier = tree["mcTier"].array(library="np") #Which tier each event is folded back to
        self.mc_nhits = tree["mcNHits"].array(library="np") #number of mc particles hits in event
        is_nu_int = tree["isNuInteration"].array(library="np") #not functioning
        is_cr_int = tree["isCosmicRay"].array(library="np") #not functioning
        is_tb_int = tree["isTestBeam"].array(library="np") #not functioning
        self.environment = np.full(is_nu_int.shape, "??") #not functioning
        self.environment[np.where(is_nu_int)] = "nu" #not functioning
        self.environment[np.where(is_cr_int)] = "tb" #not functioning
        self.environment[np.where(is_tb_int)] = "cr" #not functioning
        self.is_leading_lepton = tree["isLeadingLepton"].array(library="np")
        self.is_michel = tree["isMichel"].array(library="np")
        self.n_matches = tree["nMatches"].array(library="np")
        self.reco_id_list = tree["recoIdVector"].array(library="np")
        self.reco_nhits_list = tree["nRecoHitsVector"].array(library="np")
        self.shared_nhits_list = tree["nSharedHitsVector"].array(library="np")
        self.purity_adc_list = tree["purityAdcVector"].array(library="np")
        self.purity_list = tree["purityVector"].array(library="np")
        self.purity_list_u = tree["purityVectorU"].array(library="np")
        self.purity_list_v = tree["purityVectorV"].array(library="np")
        self.purity_list_w = tree["purityVectorW"].array(library="np")
        self.purity_adc_list_u = tree["purityAdcVectorU"].array(library="np")
        self.purity_adc_list_v = tree["purityAdcVectorV"].array(library="np")
        self.purity_adc_list_w = tree["purityAdcVectorW"].array(library="np")
        self.completeness_list = tree["completenessVector"].array(library="np")
        self.completeness_adc_list = tree["completenessAdcVector"].array(library="np")
        self.completeness_list_u = tree["completenessVectorU"].array(library="np")
        self.completeness_list_v = tree["completenessVectorV"].array(library="np")
        self.completeness_list_w = tree["completenessVectorW"].array(library="np")
        self.completeness_adc_list_u = tree["completenessAdcVectorU"].array(library="np")
        self.completeness_adc_list_v = tree["completenessAdcVectorV"].array(library="np")
        self.completeness_adc_list_w = tree["completenessAdcVectorW"].array(library="np")
        self.pc_metric = self.purity_adc_list * self.completeness_adc_list
        file.close()
        make_sequential(self)

In [None]:
import os

def save_plot(fig, filename, subdir=None):
    if subdir is None:
        subdir = ""
    elif subdir.startswith("/"):
        subdir = subdir[1:]
        
    if not os.path.exists('images'):
        os.mkdir('images')
    for img_type in [ "png", "svg", "eps" ]:
        if not os.path.exists(f'images/{img_type}'):
            os.mkdir(f'images/{img_type}')
        if not os.path.exists(f'images/{img_type}/{subdir}'):
            os.mkdir(f'images/{img_type}/{subdir}')
        fig.savefig(f'images/{img_type}/{subdir}/{filename}.{img_type}', dpi=200)

# Reading data

In [None]:
validation = MCValidation("hierarchy_mc.root", "mc")

In [None]:
pdg = 11

In [None]:
idx = np.where(abs(validation.mc_pdg) == pdg)
# temp to test
#idx = np.where((abs(validation.mc_pdg) == pdg) & ((validation.mc_nhits >= 14) & (validation.mc_nhits <= 15)))

completeness = validation.completeness_list[idx]
purity = validation.purity_list[idx]
mc_nhits = validation.mc_nhits[idx]
n_matches = validation.n_matches[idx]

index_array = [ np.argmax(val) if len(val) > 0 else -1 for val in completeness ] 
completeness_unique_array = np.array([ np.max(val) if len(val) > 0 else 0 for val in completeness ])
purity_unique_array = np.array([ purity[i][index_array[i]] if index_array[i] != -1 else 0 for i in range(len(purity)) ])

In [None]:
def prepare_plot(metric, mc_nhits, bin_width=3, metric_type='purity_completeness'):
    highest_hit = np.max(mc_nhits)
    x_axis = np.linspace(0, highest_hit, highest_hit + 1)
    y_array = np.zeros(highest_hit + 1)
    low_error_array = np.zeros_like(y_array)
    high_error_array = np.zeros_like(y_array)
    
    all_indices = [ np.where((mc_nhits >= i) & (mc_nhits < (i + bin_width))) for i in range((highest_hit + 1)) ]
    for val in range(0, highest_hit + 1, bin_width):
        if len(all_indices[val][0]) > 0:
            selection = metric[all_indices[val]]
            if metric_type.lower() == 'purity_completeness':
                y_array[val] = np.mean(selection)
                low_error_array[val] = np.zeros_like(y_array[val])#np.std(selection)
                high_error_array[val] = np.zeros_like(y_array[val])#np.std(selection)
            else:
                y_array[val] = np.mean(selection)
                low_error_array[val] = high_error_array[val] = np.sqrt(y_array[val] * (1 - y_array[val]) / len(selection))
        else:
            y_array[val] = -1

    y_array = y_array[::bin_width]
    x_axis = x_axis[::bin_width]
    low_error_array = low_error_array[::bin_width]
    high_error_array = high_error_array[::bin_width]
    
    selection_idx = np.where(y_array != -1)
    y_array = y_array[selection_idx]
    x_axis = x_axis[selection_idx]
    low_error_array = low_error_array[selection_idx]
    high_error_array = high_error_array[selection_idx]
    error_array = np.array(list(zip(low_error_array, high_error_array))).T
    
    return x_axis, y_array, error_array

In [None]:
import matplotlib.pyplot as plt

def plot_metric(x, y, err, metric="completeness"):
    plt.figure(figsize=(20, 15))
    plt.errorbar(x, y, xerr=bin_width / 2, yerr = err, fmt='o', markersize = '2', color='r', elinewidth=0.5, label = "Electron reconstruction")
    plt.xlabel("Num true hits")
    plt.ylabel(metric)
    plt.title(metric.title())
    plt.ylim(0,1.01)
    plt.rcParams["figure.figsize"] = (13,8)
    plt.show()

In [None]:
bin_width = 3
x, y, err = prepare_plot(completeness_unique_array, mc_nhits, bin_width)
plot_metric(x, y, err, "completeness")

In [None]:
x, y, err = prepare_plot(purity_unique_array, mc_nhits, bin_width)
plot_metric(x, y, err, "purity")

In [None]:
efficiency_unique_array = np.array((completeness_unique_array >= 0.5) & (purity_unique_array >= 0.5))

In [None]:
x, y, err = prepare_plot(efficiency_unique_array, mc_nhits, bin_width, metric_type="efficiency")
plot_metric(x, y, err, "efficiency")