In [1]:
import uproot
import awkward as ak
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import pandas as pd
import mplhep as hep
plt.style.use(hep.style.ROOT)

from cfg.hnl_mva_tools import read_json_file
from data_tools.load_data import read_files_and_open_trees, filter_trees
from plot_tools.plot_vars_dist_tools import load_sig_data, load_bkg_data, plot_var_dist_one_sig, plot_var_dist_more_sig

In [2]:
ntuples_json = "cfg/ntuples.json"
vars_json = "cfg/vars_new.json"
plots_dir = "../vars_dist_plots"
(
    sig_trees,
    bkg_trees,
    good_vars,
    sig_labels,
    bkg_labels
) = read_files_and_open_trees(ntuples_json, vars_json)

full_vars = read_json_file(vars_json)["vars"]
training_vars = read_json_file(vars_json)["training_vars"]
scale_factor_vars = read_json_file(vars_json)["scale_factors"]
weight_name = read_json_file(ntuples_json)["weight_name"]



#### VARS NAME DICT

In [3]:
vars_name_dict = {
    "C_Ds_pt":"$p_T(D_s)$",
    "C_Ds_vertex_cos2D":"$D_s$ vertex cos(2D)",
    "C_Ds_vertex_prob":"$D_s$ vertex prob",
    "C_Hnl_vertex_2DSig_BS":"$L_{xy} / σ$",
    "C_Hnl_vertex_cos2D":"$N$ vertex cos(2D)",
    "C_Hnl_vertex_cos3D":"$N$ vertex cos(3D)",
    "C_Hnl_vertex_prob":"$N$ vertex prob",
    "C_mu_Ds_BS_ips_xy":"$\mu_{D}$ IPS xy",
    "C_mu_Ds_pt":"$p_T(\mu_{D})$",
    "C_mu_Ds_nValidTrackerHits":"$\mu_{D}$ tracker hits",
    "C_mu_Ds_nValidPixelHits":"$\mu_{D}$ pixel hits",
    "C_mu_Ds_tkIso_R03":"$\mu_{D}$ isolation",
    "C_mu_Hnl_BS_ips_xy":"$\mu_{H}$ IPS xy",
    "C_mu_Hnl_pt":"$p_T(\mu_{H})$",
    "C_mu_Hnl_nValidTrackerHits":"$\mu_{N}$ tracker hits",
    "C_mu_Hnl_nValidPixelHits":"$\mu_{N}$ pixel hits",
    "C_mu_Hnl_tkIso_R03":"$\mu_{N}$ isolation",
    "C_pi_BS_ip_xy":"$\pi$ IPS xy",
    "C_pi_BS_ips_xy":"$\pi$ IPS xy",
    "C_pi_pt":"$p_T(\pi)$",
    "C_pi_nValidTrackerHits":"$\pi$ tracker hits",
    "C_pi_nValidPixelHits":"$\pi$ pixel hits",
    "C_mu1mu2_dr":"$\Delta R (\mu_{H}, \mu_{D})$",
    "C_mu2pi_dr":"$\Delta R (\mu_{D}, \pi)$",
    "C_pass_gen_matching":"Pass gen matching",
    "C_mu_Hnl_charge":"$\mu_{N}$ charge",
    "C_mu_Ds_charge":"$\mu_{D}$ charge"
}





In [4]:
#load_sig_data and load_bkg_data

backgrounds, backgrounds_weight = load_bkg_data(bkg_trees, full_vars,weight_name,scale_factor_vars)
bkg_keys = list(backgrounds[0].keys())
bkg_dfs = [pd.DataFrame(backgrounds[i], columns=bkg_keys) for i in range(len(backgrounds))]

# Add weights to each bkg DataFrame
for i in range(len(bkg_dfs)):
    bkg_dfs[i]['weight'] = backgrounds_weight[i]

#### CORRELATION MATRIX

In [5]:
# # FILTER TREES
# mass_list = ["mN1p0","mN1p5","mN1p8"]
# ctau_list = ["ctau10"]
# my_sig_trees, my_sig_labels = filter_trees(
#     sig_trees, sig_labels, mass_list=mass_list, ctau_list=ctau_list
# )

In [6]:
# # remove C_category from the list of variables
# # corr_vars = bkg_keys - ["C_category"]
# corr_vars = [var for var in bkg_keys if var != "C_category"]
# true_vars_names = [vars_name_dict[var] for var in corr_vars]
# for my_sig_tree, my_sig_label in zip(my_sig_trees, my_sig_labels):
#     sig, sig_weight = load_sig_data(my_sig_tree, full_vars, scale_factor_vars)
#     sig_keys = list(sig.keys())
#     sig_df = pd.DataFrame(sig, columns=sig_keys)
#     sig_df = sig_df.drop(columns=["C_category"])

#     sig_corr_matrix = sig_df.corr() * 100

#     fig, ax = plt.subplots(figsize=(15, 10))

#     plt.subplots_adjust(
#         left=0.2, right=0.8, bottom=0.2, top=0.8
#     )  # Adjust the subplot parameters
#     ax = sns.heatmap(
#         sig_corr_matrix,
#         annot=True,
#         fmt=".0f",
#         cmap="coolwarm",
#         #use vars_name_dict to replace the names
#         xticklabels=true_vars_names,
#         yticklabels=true_vars_names,
#         cbar=False,
#         annot_kws={"size": 14},
#     )
#     plt.title(f"Correlation matrix for {my_sig_label}")
#     ax.set_xticklabels(
#         ax.get_xticklabels(),
#         rotation=45,
#         horizontalalignment="right",
#         fontsize="x-small",
#     )  # Adjust x labels
#     ax.set_yticklabels(ax.get_yticklabels(), fontsize="x-small")  # Adjust y labels
#     plt.savefig(f"{plots_dir}/correlation_matrix_{my_sig_label}.png")
#     plt.close()
#     break

# # DO BACKGROUND
# # concatenate all bkg dataframes
# my_bkg_df = pd.concat(bkg_dfs)
# # make a copy without the weight column
# my_bkg_df_copy = my_bkg_df.copy()
# my_bkg_df_copy = my_bkg_df_copy.drop(columns=["weight", "C_category"])
# bkg_corr_matrix = my_bkg_df.corr() * 100
# plt.subplots_adjust(
#     left=0.2, right=0.8, bottom=0.2, top=0.8
# )  # Adjust the subplot parameters
# sns.heatmap(
#     bkg_corr_matrix,
#     annot=True,
#     fmt=".0f",
#     cmap="coolwarm",
#     xticklabels=true_vars_names,
#     yticklabels=true_vars_names,
#     cbar=False,
# )
# plt.title(f"Correlation matrix for background")
# plt.savefig(f"{plots_dir}/correlation_matrix_bkg.png")
# plt.close()


# # corr_matrix = df.corr() * 100
# #
# #     plt.subplots_adjust(
# #         left=0.2, right=0.8, bottom=0.2, top=0.8
# #     )  # Adjust the subplot parameters
# #     sns.heatmap(
# #         corr_matrix,
# #         annot=True,
# #         fmt=".0f",
# #         cmap="coolwarm",
# #         xticklabels=var_names,
# #         yticklabels=var_names,
# #         cbar=False,
# #     )

#### USE THIS FOR SPECIFIC PLOTS

In [7]:
# FILTER TREES
mass_list = ["mN1p0","mN1p5","mN1p8"]
ctau_list = ["ctau10"]
my_sig_trees, my_sig_labels = filter_trees(
    sig_trees, sig_labels, mass_list=mass_list, ctau_list=ctau_list
)

In [8]:
#┌─────────────────────────────┐
#│ USE THIS FOR SPECIFIC PLOTS │
#└─────────────────────────────┘
category_list = [1,2,3,4,5,6]
category_var = "C_category"

my_vars = ["C_Hnl_vertex_2DSig_BS"]
my_sig_dfs = []

#make plots for all categories
out_dir = f"{plots_dir}/my_plots"
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

for my_sig_tree, my_sig_label in zip(my_sig_trees, my_sig_labels):
    sig, sig_weight = load_sig_data(my_sig_tree, full_vars, scale_factor_vars)
    sig_keys = list(sig.keys())
    sig_df = pd.DataFrame(sig, columns=sig_keys)
    # Add weights to sig DataFrame
    sig_df['weight'] = sig_weight
    my_sig_dfs.append(sig_df)
    for category in category_list:
        break
        out_dir += f"/cat_{category}"
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        
        #mask away data points that are not in the category
        sig_df_masked = sig_df[sig_df[category_var] == category]
        bkg_dfs_masked = [bkg_dfs[i][bkg_dfs[i][category_var] == category] for i in range(len(bkg_dfs))]
        #plot the variables
        for var in full_vars:
            signal = sig_df_masked[var]
            signal_weight = sig_df_masked['weight']
            backgrounds = [bkg_dfs_masked[i][var] for i in range(len(bkg_dfs_masked))]
            backgrounds_weight = [bkg_dfs_masked[i]['weight'] for i in range(len(bkg_dfs_masked))]
            plot_var_dist(signal,backgrounds, signal_weight, backgrounds_weight, my_sig_label,bkg_labels,var,out_dir)


#plot the variables
for var in my_vars:
    signals = [sig_df[var] for sig_df in my_sig_dfs]
    signals_weight = [sig_df['weight'] for sig_df in my_sig_dfs]
    backgrounds = [bkg_dfs[i][var] for i in range(len(bkg_dfs))]
    backgrounds_weight = [bkg_dfs[i]['weight'] for i in range(len(bkg_dfs))]
    plot_var_dist_more_sig(signals,backgrounds, signals_weight, backgrounds_weight, my_sig_labels,bkg_labels,var,out_dir)

    



Loading Signal Variables...
Signal Variables Loaded!
Loading Signal Variables...
Signal Variables Loaded!
Loading Signal Variables...
Signal Variables Loaded!


#### USE THIS FOR ALL PLOTS

In [9]:
# FILTER TREES
mass_list = ["mN1p0", "mN1p5"]
ctau_list = ["ctau10"]
my_sig_trees, my_sig_labels = filter_trees(
    sig_trees, sig_labels, mass_list=mass_list, ctau_list=ctau_list
)

In [10]:
category_list = [1,2,3,4,5,6]
category_var = "C_category"
for my_sig_tree, my_sig_label in zip(my_sig_trees, my_sig_labels):
    sig, sig_weight = load_sig_data(my_sig_tree, full_vars, scale_factor_vars)
    sig_keys = list(sig.keys())
    sig_df = pd.DataFrame(sig, columns=sig_keys)
    # Add weights to sig DataFrame
    sig_df['weight'] = sig_weight
    for category in category_list:
        out_dir = f"{plots_dir}/{my_sig_label}/cat_{category}"
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        
        #mask away data points that are not in the category
        sig_df_masked = sig_df[sig_df[category_var] == category]
        bkg_dfs_masked = [bkg_dfs[i][bkg_dfs[i][category_var] == category] for i in range(len(bkg_dfs))]
        #plot the variables
        for var in full_vars:
            signal = sig_df_masked[var]
            signal_weight = sig_df_masked['weight']
            backgrounds = [bkg_dfs_masked[i][var] for i in range(len(bkg_dfs_masked))]
            backgrounds_weight = [bkg_dfs_masked[i]['weight'] for i in range(len(bkg_dfs_masked))]
            plot_var_dist_one_sig(signal,backgrounds, signal_weight, backgrounds_weight, my_sig_label,bkg_labels,var,out_dir, category=category)
    #make plots for all categories
    out_dir = f"{plots_dir}/{my_sig_label}/all_categories"
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    #plot the variables
    for var in full_vars:
        signal = sig_df[var]
        signal_weight = sig_df['weight']
        backgrounds = [bkg_dfs[i][var] for i in range(len(bkg_dfs))]
        backgrounds_weight = [bkg_dfs[i]['weight'] for i in range(len(bkg_dfs))]
        plot_var_dist_one_sig(signal,backgrounds, signal_weight, backgrounds_weight, my_sig_label,bkg_labels,var,out_dir)

Loading Signal Variables...
Signal Variables Loaded!


Loading Signal Variables...
Signal Variables Loaded!
