In [None]:
import os, sys, importlib
# sys.path.append('../')

import nsbi_common_utils
from nsbi_common_utils import plotting, training, inference
import glob
import numpy as np

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
from tensorflow.keras.optimizers import Nadam
import mplhep as hep
import pickle
import matplotlib.pyplot as plt
import yaml

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

hep.style.use(hep.style.ATLAS)


In [None]:
# Initialize a skeleton workspace spec
spec = {
    "channels": [],
    "measurements": [],
    "observations": [],
    "version": [],
}

In [None]:
with open("config.yml", "r") as f:
    config = yaml.safe_load(f)

In [None]:
# path prefix for general save directory
path_prefix = config['path_prefix']

# sub-path for saving cached data used between modules
path_saved_data = config['path_saved_data']
saved_data = f'{path_prefix}{path_saved_data}'

# Get the dictionary of labels to processes
labels_dict = config["labels_dict"]

# Signal processes in the model
signal_processeses = config["signal_processes"]

# Background processes in the model
background_processes = config["background_processes"]

mix_model_hypotheses = config["mix_model_hypotheses"]
ref_processes = config["ref_processes"]

all_process = signal_processeses + background_processes

channels_binned = config["channels_binned"]
channels_unbinned = config["channels_unbinned"]

print(channels_binned)
print(channels_unbinned)

In [None]:
# Load the nominal dataset for evaluation
dataset = pd.read_hdf(f"{saved_data}dataset_preselected_nominal_SR.h5", key="dataset", mode='r')

mask_processes = {}
for process_type in all_process:
    mask_processes[process_type] = dataset.type==process_type

# Mask that extracts the expected Asimov dataset
process_asimov = all_process

mask_Asimov = np.logical_or.reduce([mask_processes[process_type] for process_type in process_asimov])
weights_Asimov = np.array(dataset[mask_Asimov].weights.ravel())

In [None]:
path_to_workspace_data_nominal = f'{path_prefix}output_training_nominal/'

In [None]:
path_to_Asimov_weights = f"{path_to_workspace_data_nominal}/weights_nominal_Asimov.npy"
np.save(path_to_Asimov_weights, weights_Asimov)

In [None]:
spec_channels_list = []

spec_channels_list.append(
    "name": "SR",
    "type": "unbinned",
    "weights": path_to_Asimov_weights,
    "samples": []
)

In [30]:
# spec["channels"].append(
#     {
#         "name": "SR",
#         "type": "unbinned",
#         "weights": path_to_Asimov_weights,
#         "samples": []
#     }
# )

In [31]:
spec

{'channels': [{'name': 'SR',
   'type': 'unbinned',
   'weights': '/home/jaySandesara/NSBI_workflow_tutorial/FAIR_universe_Higgs_tautau/output_training_nominal//weights_nominal_Asimov.npy',
   'samples': []}],
 'measurements': [],
 'observations': [],
 'version': []}

In [33]:
for process in all_process:
    if process in mix_model_hypotheses:
        path_to_ratio = f'{path_to_workspace_data_nominal}output_ratios_{process_type}/ratio_{process_type}.npy'
        spec["channels"][0]["samples"].append(
            {
                "name": process,
                "data": path_to_ratio,
                "modifiers":[
                    {
                        "data": None,
                        "name": f"mu_{process}",
                        "type": "normfactor",   
                    },
                ]
            }
        )
    else:
        # The case where a sample is used as reference and thus has density ratio = 1 by default
        spec["channels"][0]["samples"].append(
            {
                "name": process,
                "data": "",
                "modifiers":[
                    {
                        "data": None,
                        "name": f"mu_{process}",
                        "type": "normfactor",   
                    },
                ]
            }
        )
    

In [36]:
spec

{'channels': [{'name': 'SR',
   'type': 'unbinned',
   'weights': '/home/jaySandesara/NSBI_workflow_tutorial/FAIR_universe_Higgs_tautau/output_training_nominal//weights_nominal_Asimov.npy',
   'samples': [{'name': 'htautau',
     'data': '/home/jaySandesara/NSBI_workflow_tutorial/FAIR_universe_Higgs_tautau/output_training_nominal/output_ratios_ztautau/ratio_ztautau.npy',
     'modifiers': [{'data': None,
       'name': 'mu_htautau',
       'type': 'normfactor'}]},
    {'name': 'ttbar',
     'data': '/home/jaySandesara/NSBI_workflow_tutorial/FAIR_universe_Higgs_tautau/output_training_nominal/output_ratios_ztautau/ratio_ztautau.npy',
     'modifiers': [{'data': None, 'name': 'mu_ttbar', 'type': 'normfactor'}]},
    {'name': 'ztautau',
     'data': '/home/jaySandesara/NSBI_workflow_tutorial/FAIR_universe_Higgs_tautau/output_training_nominal/output_ratios_ztautau/ratio_ztautau.npy',
     'modifiers': [{'data': None,
       'name': 'mu_ztautau',
       'type': 'normfactor'}]}]}],
 'measurem

In [None]:
# Get the saved density ratios
ratios = {}
ratios['SR'] = {}

for process_type in mix_model_hypotheses:

    path_to_ratio = f'{top_path}output_ratios_{process_type}/ratio_{process_type}.npy'
    ratios['SR'][process_type] = jnp.array(np.load(path_to_ratio)[mask_Asimov])

for process in all_process:
    if process not in mix_model_hypotheses:

        ratios['SR'][process] = jnp.array(np.ones_like(ratios['SR'][mix_model_hypotheses[0]]))


In [None]:
spec = {
    "channels": [
        {
            "name": "SR",
            "type": "unbinned",
            "weights": path_to_Asimov_weights,
            "samples": [
                {
                    "data": "/path/to/data/data.npy",
                    "modifiers":[
                        "data": None,
                        "name": "mu_higgs",
                        "type": "normfactor"
                    ],
                    "name": "Signal"
                },
                {
                    "data": "/path/to/data/data.npy",
                    "weights": "/path/to/weights/weights.npy",
                    "modifiers": [
                        "data": None,
                        "name": "mu_ztautau",
                        "type": "normfactor"
                    ],
                    "name": "Background",
                },
            ]
        },
        {
            "name": "SR_binned",
            "type": "binned",
            "samples": [
                {
                    "data": [],
                    "modifiers": [],
                    "name": "Signal"
                },
                {
                    "data": [],
                    "modifiers": [],
                    "name": "Background"
                }
            ]
        }
    ],
    "measurements": [],
    "observations":[],
    "version": "1.0.0"
}