In [1]:
import uproot
import awkward as ak
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import pandas as pd

from cfg.hnl_mva_tools import read_json_file
from data_tools.load_data import read_files_and_open_trees, filter_trees
from plot_tools.plot_vars_dist_tools import plot_var_dist, load_sig_data, load_bkg_data, load_bkg_data2, plot_var_dist_collapse_bkg

In [2]:
ntuples_json = "cfg/ntuples.json"
vars_json = "cfg/vars_new.json"
plots_dir = "../vars_dist_plots"
(
    sig_trees,
    bkg_trees,
    good_vars,
    sig_labels,
    bkg_labels
) = read_files_and_open_trees(ntuples_json, vars_json)

full_vars = read_json_file(vars_json)["vars"]
training_vars = read_json_file(vars_json)["training_vars"]
scale_factor_vars = read_json_file(vars_json)["scale_factors"]
weight_name = read_json_file(ntuples_json)["weight_name"]



In [3]:
#load_sig_data and load_bkg_data

backgrounds, backgrounds_weight = load_bkg_data2(bkg_trees, full_vars,weight_name,scale_factor_vars)
bkg_keys = list(backgrounds[0].keys())
bkg_dfs = [pd.DataFrame(backgrounds[i], columns=bkg_keys) for i in range(len(backgrounds))]

# Add weights to each bkg DataFrame
for i in range(len(bkg_dfs)):
    bkg_dfs[i]['weight'] = backgrounds_weight[i]

#### USE THIS FOR SPECIFIC PLOTS

In [4]:
# FILTER TREES
mass_list = ["mN1p5"]
ctau_list = ["ctau10", "ctau100", "ctau1000"]
my_sig_trees, my_sig_labels = filter_trees(
    sig_trees, sig_labels, mass_list=mass_list, ctau_list=ctau_list
)

In [5]:
#┌─────────────────────────────┐
#│ USE THIS FOR SPECIFIC PLOTS │
#└─────────────────────────────┘
category_list = [1,2,3,4,5,6]
category_var = "C_category"

my_vars = ["C_Hnl_vertex_2DSig_BS"]
my_sig_dfs = []

#make plots for all categories
out_dir = f"{plots_dir}/my_plots"
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

for my_sig_tree, my_sig_label in zip(my_sig_trees, my_sig_labels):
    sig, sig_weight = load_sig_data(my_sig_tree, full_vars, scale_factor_vars)
    sig_keys = list(sig.keys())
    sig_df = pd.DataFrame(sig, columns=sig_keys)
    # Add weights to sig DataFrame
    sig_df['weight'] = sig_weight
    my_sig_dfs.append(sig_df)
    for category in category_list:
        break
        out_dir += f"/cat_{category}"
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        
        #mask away data points that are not in the category
        sig_df_masked = sig_df[sig_df[category_var] == category]
        bkg_dfs_masked = [bkg_dfs[i][bkg_dfs[i][category_var] == category] for i in range(len(bkg_dfs))]
        #plot the variables
        for var in full_vars:
            signal = sig_df_masked[var]
            signal_weight = sig_df_masked['weight']
            backgrounds = [bkg_dfs_masked[i][var] for i in range(len(bkg_dfs_masked))]
            backgrounds_weight = [bkg_dfs_masked[i]['weight'] for i in range(len(bkg_dfs_masked))]
            plot_var_dist(signal,backgrounds, signal_weight, backgrounds_weight, my_sig_label,bkg_labels,var,out_dir)


#plot the variables
for var in my_vars:
    signals = [sig_df[var] for sig_df in my_sig_dfs]
    signals_weight = [sig_df['weight'] for sig_df in my_sig_dfs]
    backgrounds = [bkg_dfs[i][var] for i in range(len(bkg_dfs))]
    backgrounds_weight = [bkg_dfs[i]['weight'] for i in range(len(bkg_dfs))]
    plot_var_dist_collapse_bkg(signals,backgrounds, signals_weight, backgrounds_weight, my_sig_labels,bkg_labels,var,out_dir)



Loading Signal Variables...
Signal Variables Loaded!
Loading Signal Variables...
Signal Variables Loaded!
Loading Signal Variables...
Signal Variables Loaded!


#### USE THIS FOR ALL PLOTS

In [None]:
# FILTER TREES
mass_list = ["mN1p0", "mN1p5"]
ctau_list = ["ctau10"]
my_sig_trees, my_sig_labels = filter_trees(
    sig_trees, sig_labels, mass_list=mass_list, ctau_list=ctau_list
)

In [None]:
category_list = [1,2,3,4,5,6]
category_var = "C_category"
for my_sig_tree, my_sig_label in zip(my_sig_trees, my_sig_labels):
    sig, sig_weight = load_sig_data(my_sig_tree, full_vars, scale_factor_vars)
    sig_keys = list(sig.keys())
    sig_df = pd.DataFrame(sig, columns=sig_keys)
    # Add weights to sig DataFrame
    sig_df['weight'] = sig_weight
    for category in category_list:
        break
        out_dir = f"{plots_dir}/{my_sig_label}/cat_{category}"
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        
        #mask away data points that are not in the category
        sig_df_masked = sig_df[sig_df[category_var] == category]
        bkg_dfs_masked = [bkg_dfs[i][bkg_dfs[i][category_var] == category] for i in range(len(bkg_dfs))]
        #plot the variables
        for var in full_vars:
            signal = sig_df_masked[var]
            signal_weight = sig_df_masked['weight']
            backgrounds = [bkg_dfs_masked[i][var] for i in range(len(bkg_dfs_masked))]
            backgrounds_weight = [bkg_dfs_masked[i]['weight'] for i in range(len(bkg_dfs_masked))]
            plot_var_dist(signal,backgrounds, signal_weight, backgrounds_weight, my_sig_label,bkg_labels,var,out_dir)
    #make plots for all categories
    out_dir = f"{plots_dir}/{my_sig_label}/all_categories"
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    #plot the variables
    for var in full_vars:
        signal = sig_df[var]
        signal_weight = sig_df['weight']
        backgrounds = [bkg_dfs[i][var] for i in range(len(bkg_dfs))]
        backgrounds_weight = [bkg_dfs[i]['weight'] for i in range(len(bkg_dfs))]
        plot_var_dist(signal,backgrounds, signal_weight, backgrounds_weight, my_sig_label,bkg_labels,var,out_dir)


Loading Signal Variables...
Signal Variables Loaded!
Loading Signal Variables...
Signal Variables Loaded!
