In [1]:
import json
import uproot 
import numpy as np
import boost_histogram as bh
import glob
import copy
from tqdm import tqdm

In [3]:
output_direc = "/vols/cms/emc21/FCC/FCC-Study/runs/e365_full_run_fixedLumis/run3/combine_grid_space_2.5/combine"

In [4]:
background_files = glob.glob(f"{output_direc}/mH*_mA*/backgrounds.json")


In [7]:
# bkg_proc_groups = {
#     "ZZ" : ['p8_ee_ZZ_ecm240'],
#     "tautau" : ['wzp6_ee_tautau_ecm240'],
#     "WW" : ['p8_ee_WW_ecm240'],
#     "ee" : ['wzp6_ee_ee_Mee_30_150_ecm240'],
#     "mumu" : ['wzp6_ee_mumu_ecm240'],
#     "Higgs_X" : ['wzp6_ee_eeH_ecm240', 'wzp6_ee_mumuH_ecm240', 
#                     'wzp6_ee_nunuH_ecm240', 'wzp6_ee_tautauH_ecm240']
# }
bkg_proc_groups = {
    "ZZ" : ['p8_ee_ZZ_ecm365'],
    "tautau" : ['wzp6_ee_tautau_ecm365'],
    "WW" : ['p8_ee_WW_ecm365'],
    "ee" : ['wzp6_ee_ee_Mee_30_150_ecm365'],
    "mumu" : ['wzp6_ee_mumu_ecm365'],
    "Higgs_X" : ['wzp6_ee_eeH_ecm365', 'wzp6_ee_mumuH_ecm365', 
                    'wzp6_ee_nunuH_ecm365', 'wzp6_ee_tautauH_ecm365']
}

def getGroup(proc):
    for group, procs in bkg_proc_groups.items():
        if proc in procs:
            return group
    raise ValueError(f"Process {proc} not found in any group")


def makeEmptyHistogramDict():
    # This just makes a histogram with empty bins for each process
    bins = np.linspace(0.9, 1, 25)
    hist = np.zeros(len(bins)-1)

    return {proc : [copy.deepcopy(hist), copy.deepcopy(hist)] for proc in bkg_proc_groups.keys()}



In [8]:
bins = np.linspace(0.9, 1, 25)


for file in tqdm(sorted(background_files)):
    with open(file) as f:
        hists_dict = json.load(f)

    hists_dict.keys()

    # I want to make a dictionary of hists, with the correct samples combined
    # and with the correct names 
    root_hist_dict = {
        "Electron" : makeEmptyHistogramDict(),
        "Muon" : makeEmptyHistogramDict()
    }

    for proc, hists in hists_dict.items():
        proc_name, process = proc.split(";")
        hist, sumw2 = hists

        group = getGroup(proc_name)

        root_hist_dict[process][group][0] += np.array(hist)
        root_hist_dict[process][group][1] += np.array(sumw2)

    # Also add all of the histograms together, to make data_obs
    for process in ['Electron', 'Muon']:
        root_hist_dict[process]['data_obs'] = [np.zeros(len(hist)), np.zeros(len(hist))]
        for group, hists in root_hist_dict[process].items():
            if group == "data_obs":
                continue
            root_hist_dict[process]['data_obs'][0] += hists[0]
            root_hist_dict[process]['data_obs'][1] += hists[1]

    # root_hist_dict['Electron']['data_obs'] = [np.zeros(len(hist)), np.zeros(len(hist))]
    # root_hist_dict['Muon']['data_obs'] = [np.zeros(len(hist)), np.zeros(len(hist))]
    # for group, hists in root_hist_dict.items():
    #     if group == "data_obs":
    #         continue
    #     for process, hist in hists.items():
    #         root_hist_dict[group]['data_obs'][0] += hist[0]
    #         root_hist_dict[group]['data_obs'][1] += hist[1]
    # data_obs = [np.zeros(len(hist)), np.zeros(len(hist))]
    # for group, hists in root_hist_dict.items():
    #     data_obs[0] += hists[0]
    #     data_obs[1] += hists[1]
    
    # root_hist_dict["data_obs"] = data_obs

    #print(root_hist_dict)

    
    mass_point_loc = "/".join(file.split("/")[:-1])
    # Now that I have the histograms, I want to save them to a root file
    with uproot.recreate(f"{mass_point_loc}/backgrounds.root") as f:
        for process, hist_dict in root_hist_dict.items():
            for group, hists in hist_dict.items():
                hist, sumw2 = hists

                # If hist is all zeros, then just add a tiny value 
                if np.all(hist == 0):
                    hist += 1e-7

                root_hist = bh.Histogram(bh.axis.Variable(bins), 
                                        storage=bh.storage.Weight())
                root_hist[...] = np.stack([hist, sumw2], axis=-1)

                f[f"{group}_{process}"] = root_hist



  0%|          | 7/1971 [00:00<01:47, 18.29it/s]

100%|██████████| 1971/1971 [01:43<00:00, 18.98it/s]


In [9]:
10 % 5

0

In [18]:
import awkward as ak
import pickle

evs = ak.from_parquet("/vols/cms/emc21/FCC/FCC-Study/runs/e240_full_run/run25/data/test/awkward/mH50_mA110.parquet")

scaler = pickle.load(open("/vols/cms/emc21/FCC/FCC-Study/runs/e240_full_run/run25/scaler.pkl", "rb"))

In [22]:
branches = [
    "Zcand_m",
    "Zcand_pt",
    "Zcand_pz",
    "Zcand_p",
    "Zcand_povere",
    "Zcand_e",
    "Zcand_costheta",
    "Zcand_recoil_m",
    "lep1_pt",
    "lep1_eta",
    "lep1_e",
    "lep1_charge",
    "lep2_pt",
    "lep2_eta",
    "lep2_e",
    "lep2_charge",
    "lep_chargeprod",
    "cosDphiLep",
    "cosThetaStar",
    "cosThetaR",
    "n_jets",
    "MET_e",
    "MET_pt",
    "MET_eta",
    "MET_phi",
    "n_muons",
    "n_electrons"
]

features = evs[branches]
ak.to_numpy(features)

array([(19.668781  , 17.123974 ,   4.7956343, 17.782816, 0.67064893, 26.515835,  0.269678  , 212.74223, 16.74917  ,  0.5454145, 19.30308 , -1.,  5.383241 , -0.8025567 ,  7.2127566,  1., -1., -0.0902985 ,  0.6310662 ,  0.6310662 , 2., 17.782818, 17.123974 , -0.2765165 , -0.77199376, 2.0000000e+00, -1.7285675e-08),
       (33.26478   , 36.567467 ,  -3.15905  , 36.703667, 0.7409661 , 49.53488 , -0.0860691 , 186.89516, 26.739408 ,  0.3524456, 28.417427, -1., 16.811598 , -0.70125806, 21.117456 ,  1., -1.,  0.37767473,  0.68878454, -0.68878454, 1., 36.703667, 36.567467 ,  0.08628252,  1.3059946 , 1.2595877e-08,  2.0000000e+00),
       (41.854584  , 29.414341 ,  47.43231  , 55.81243 , 0.8000326 , 69.762695,  0.849852  , 160.8282 , 38.957924 ,  0.8013589, 52.150753,  1., 12.136463 ,  0.9173938 , 17.611938 , -1., -1., -0.8458025 ,  0.05136126,  0.05136126, 2., 56.96219 , 29.81555  , -1.2636619 ,  1.3813205 , 1.2595877e-08,  2.0000000e+00),
       ...,
       ( 0.33173603,  7.9121943, -12.105542

In [30]:
for n_mu in np.unique(evs.n_muons):
    cut = evs.n_muons == n_mu
    eff = ak.sum(cut) / ak.count(cut)

    print(f"Efficiency for {n_mu} muons: {eff}")

Efficiency for 1.2595877230126007e-08 muons: 0.49335116385975175
Efficiency for 1.0 muons: 1.6716118517280287e-05
Efficiency for 2.0 muons: 0.506632120021731


In [28]:
np.unique(evs.n_muons)

In [29]:
bins = np.linspace(0.9, 1, 25)

file = background_files[0]

with open(file) as f:
    hists_dict = json.load(f)

hists_dict.keys()

# I want to make a dictionary of hists, with the correct samples combined
# and with the correct names 
root_hist_dict = makeEmptyHistogramDict()

for proc, hists in hists_dict.items():
    hist, sumw2 = hists

    group = getGroup(proc)
    print(proc, group)

    root_hist_dict[group][0] += np.array(hist)
    root_hist_dict[group][1] += np.array(sumw2)


mass_point_loc = "/".join(file.split("/")[:-1])
# Now that I have the histograms, I want to save them to a root file
with uproot.recreate(f"{mass_point_loc}/backgrounds.root") as f:
    for group, hist in root_hist_dict.items():
        hist, sumw2 = hist

        # If hist is all zeros, then just add a tiny value 
        if np.all(hist == 0):
            hist += 1e-7

        root_hist = bh.Histogram(bh.axis.Variable(bins), 
                                storage=bh.storage.Weight())
        root_hist[...] = np.stack([hist, sumw2], axis=-1)

        f[group] = root_hist


p8_ee_WW_ecm240 WW
p8_ee_ZZ_ecm240 ZZ
wzp6_ee_eeH_ecm240 Higgs_X
wzp6_ee_ee_Mee_30_150_ecm240 ee
wzp6_ee_mumuH_ecm240 Higgs_X
wzp6_ee_mumu_ecm240 mumu
wzp6_ee_nunuH_ecm240 Higgs_X
wzp6_ee_tautauH_ecm240 Higgs_X
wzp6_ee_tautau_ecm240 tautau


In [24]:
file
mass_point_loc = "/".join(file.split("/")[:-1])
mass_point_loc

'/vols/cms/emc21/FCC/FCC-Study/runs/e240_full_run/run25/combine_grid_space_2.5/combine/mH90.0_mA120.0'

In [22]:
root_hist_dict

{'ZZ': [array([ 25.265625,  38.328125,  33.984375,  28.734375,  31.359375,
          25.25    ,  33.109375,  50.515625,  54.015625,  61.84375 ,
          67.953125,  77.515625,  90.609375, 100.171875, 138.5     ,
         163.75    , 185.5625  , 241.28125 , 331.03125 , 388.5     ,
         442.515625, 601.046875, 593.21875 , 206.453125]),
  array([ 21.96875 ,  33.359375,  29.546875,  25.015625,  27.265625,
          22.      ,  28.78125 ,  43.96875 ,  46.96875 ,  53.8125  ,
          59.09375 ,  67.46875 ,  78.78125 ,  87.15625 , 120.5     ,
         142.46875 , 161.40625 , 209.921875, 287.96875 , 337.984375,
         384.96875 , 522.890625, 516.078125, 179.59375 ])],
 'tautau': [array([304.375 , 294.625 , 291.5   , 269.125 , 265.8125, 281.875 ,
         246.625 , 256.3125, 278.6875, 272.1875, 166.5625, 163.4375,
         102.4375,  44.875 ,  44.8125,  38.375 ,  22.5   ,   6.4375,
          25.5625,   9.625 ,  19.1875,  12.8125,   6.375 ,   3.25  ]),
  array([979.25, 948.75, 937.75, 86