In [None]:
# Imports
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

import uproot
import particle

# Imports from this project
sys.path.insert(0, "..")
from utils import paths
from utils.input_output import load_data_from_root, load_preprocessed_data, load_feature_properties
from utils.histograms import find_good_binning, get_hist, calc_pull
from utils.merge_pdfs import merge_pdfs

from hist_features_by_label import hist_feature_by_label


# Check if the SS classified data is correct

In [None]:
df = load_preprocessed_data(N_entries_max=1000000000, input_file=paths.ss_classified_data_file)

In [None]:
df

In [None]:
df.columns

In [None]:
df[["Tr_is_SS","Tr_ProbSS"]].head(15)

In [None]:
df[["Tr_is_SS","Tr_ProbSS"]].sort_values(by="Tr_ProbSS")

# Check for Bd Bs separation based on the feature importance

In [None]:
df_data = load_preprocessed_data()

In [None]:
feature_props = load_feature_properties()

In [None]:
fkey = "Tr_T_x"
fprops = feature_props[fkey]

lkey = "B_is_strange"
lprops = feature_props[lkey]

n_bins = 200
lower_quantile = 0.01
higher_quantile = 0.99

In [None]:
fig = hist_feature_by_label(df_data, fkey, fprops, lkey, lprops, higher_quantile=higher_quantile, lower_quantile=lower_quantile, add_cut=True, cut_query="Tr_is_SS==1", cut_label="is SS")

plt.show()

In [None]:

fkey = "Tr_T_x"
fprops = feature_props[fkey]

lkey = "B_is_strange"
lprops = feature_props[lkey]

n_bins = 200
lower_quantile = 0.01
higher_quantile = 0.99

fprops[f"quantile_{lower_quantile}"] = 0.0001
fprops[f"quantile_{higher_quantile}"] = 200

fig = hist_feature_by_label(df_data, fkey, fprops, lkey, lprops, higher_quantile=higher_quantile, lower_quantile=lower_quantile, add_cut=True, cut_query="Tr_is_SS==1", cut_label="is SS")

plt.show()

fig = hist_feature_by_label(df_data.eval("Tr_T_x=Tr_T_x*-1"), fkey, fprops, lkey, lprops, higher_quantile=higher_quantile, lower_quantile=lower_quantile, add_cut=True, cut_query="Tr_is_SS==1", cut_label="is SS")

plt.show()