In [3]:
import os, sys, importlib
# sys.path.append('../')

import nsbi_common_utils
from nsbi_common_utils import plotting, training, inference
import glob
import numpy as np

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
from tensorflow.keras.optimizers import Nadam
import mplhep as hep
import pickle
import matplotlib.pyplot as plt
import yaml

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

hep.style.use(hep.style.ATLAS)


In [15]:
# Initialize a skeleton workspace spec
spec = {
    "channels": [],
    "measurements": [],
    "observations": [],
    "version": [],
}

In [16]:
with open("config.yml", "r") as f:
    config = yaml.safe_load(f)

In [17]:
# path prefix for general save directory
path_prefix = config['path_prefix']

# sub-path for saving cached data used between modules
path_saved_data = config['path_saved_data']
saved_data = f'{path_prefix}{path_saved_data}'

# Get the dictionary of labels to processes
labels_dict = config["labels_dict"]

# Signal processes in the model
signal_processeses = config["signal_processes"]

# Background processes in the model
background_processes = config["background_processes"]

mix_model_hypotheses = config["mix_model_hypotheses"]
ref_processes = config["ref_processes"]

all_process = signal_processeses + background_processes

channels_binned = config["channels_binned"]
channels_unbinned = config["channels_unbinned"]

print(channels_binned)
print(channels_unbinned)

['SR_binned', 'CR']
['SR']


In [18]:
# Load the nominal dataset for evaluation
dataset = pd.read_hdf(f"{saved_data}dataset_preselected_nominal_SR.h5", key="dataset", mode='r')

mask_processes = {}
for process_type in all_process:
    mask_processes[process_type] = dataset.type==process_type

# Mask that extracts the expected Asimov dataset
process_asimov = all_process

mask_Asimov = np.logical_or.reduce([mask_processes[process_type] for process_type in process_asimov])
weights_Asimov = np.array(dataset[mask_Asimov].weights.ravel())

  weights_Asimov = np.array(dataset[mask_Asimov].weights.ravel())


In [19]:
path_to_workspace_data_nominal = f'{path_prefix}output_training_nominal/'

In [20]:
path_to_Asimov_weights = f"{path_to_workspace_data_nominal}/weights_nominal_Asimov.npy"
np.save(path_to_Asimov_weights, weights_Asimov)

In [21]:
spec["channels"].append(
    {
        "name": "SR",
        "type": "unbinned",
        "weights": path_to_Asimov_weights,
        "samples": []
    }
)

In [22]:
spec

{'channels': [{'name': 'SR',
   'type': 'unbinned',
   'weights': '/home/jaySandesara/NSBI_workflow_tutorial/FAIR_universe_Higgs_tautau/output_training_nominal//weights_nominal_Asimov.npy',
   'samples': []}],
 'measurements': [],
 'observations': [],
 'version': []}

In [12]:
path_to_dict_systs = f"{saved_data}dict_systs.npy"

# Check if user has provided uncertainty NPs
if "dict_systs" in config:
    dict_systs = config["dict_systs"]
    # Load the SR yield variations
    with open(f"{saved_data}yield_SR_variations.pkl", "rb") as fp:
        nu_var_SR = pickle.load(fp)
    # Save the Control Region variation histogram
    with open(f"{saved_data}hist_binned_variations.pkl", "rb") as fp:
        hist_variations = pickle.load(fp)
else:
    dict_systs = {}

# Full list of systematics
list_syst = [key for key in dict_systs]

print(list_syst)

['TES', 'JES']


In [None]:
spec = {
    "channels": [
        {
            "name": "SR",
            "type": "unbinned",
            "weights": path_to_Asimov_weights,
            "samples": [
                {
                    "data": "/path/to/data/data.npy",
                    "modifiers":[
                        "data": None,
                        "name": "mu_higgs",
                        "type": "normfactor"
                    ],
                    "name": "Signal"
                },
                {
                    "data": "/path/to/data/data.npy",
                    "weights": "/path/to/weights/weights.npy",
                    "modifiers": [
                        "data": None,
                        "name": "mu_ztautau",
                        "type": "normfactor"
                    ],
                    "name": "Background",
                },
            ]
        },
        {
            "name": "SR_binned",
            "type": "binned",
            "samples": [
                {
                    "data": [],
                    "modifiers": [],
                    "name": "Signal"
                },
                {
                    "data": [],
                    "modifiers": [],
                    "name": "Background"
                }
            ]
        }
    ],
    "measurements": [],
    "observations":[],
    "version": "1.0.0"
}