In [3]:
import gzip
import json

import correctionlib
import correctionlib.convert
import numpy as np
import requests
import rich
import hist

ElectronWP = {
    "Full2022EEv12": {
        "TightObjWP": {
            "mvaWinter22V2Iso_WP90": {
                "tkSF": {
                    "1-1": [
                        "2022Re-recoE+PromptFG",
                        "Electron-ID-SF",
                        "data/jsonpog-integration/POG/EGM/2022_Summer22EE/electron.json.gz",
                    ]
                },
                "wpSF": {
                    "1-1": [
                        "2022Re-recoE+PromptFG",
                        "Electron-ID-SF",
                        "passingMVA90_HWW",
                        "data/scale_factor/Full2022EEv12/electron.json",
                    ],
                },
            }
        }
    }
}
MuonWP = {
    "Full2022EEv12": {
        "TightObjWP": {
            "cut_Tight_HWW": {
                "idSF": {
                    "1-1": [
                        "NUM_TightID_HWW_DEN_TrackerMuons",
                        "data/scale_factor/Full2022EEv12/muonSF_latinos_HWW.json",
                    ],
                },
                "isoSF": {
                    "1-1": [
                        "NUM_TightPFIso_DEN_TightID_HWW",
                        "data/scale_factor/Full2022EEv12/muonSF_latinos_HWW.json",
                    ],
                },
            }
        }
    }
}

path_jsonpog = "/Users/giorgiopizzati/Downloads/jsonpog-integration-master/POG"
path_jsonpog = "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/POG"
url_latinos = "https://raw.githubusercontent.com/latinos/LatinoAnalysis/UL_production/NanoGardener/python/data/scale_factor/"

In [4]:
ERA = "Full2022EEv12"
eleWP = 'mvaWinter22V2Iso_WP90'

In [5]:
def get_cset_electron(corr, wp, tag, return_histo=False):
    real_content = corr["corrections"][0]["data"]["content"][0]["value"]["content"]
    content_syst = []
    for valType in ["sf", "sfdown", "sfup"]:
        sf_ind = find_key(valType, real_content)
        sf = real_content[sf_ind]["value"]["content"]

        wp_ind = find_key(wp, sf)
        obj = sf[wp_ind]["value"]
        content = np.array(obj["content"])
        content_syst.append(content)
        axis = [
            hist.axis.Variable(edges, name=name)
            for edges, name in zip(obj["edges"], obj["inputs"])
        ]
    content_syst = np.array(content_syst)
    shape = [ax.edges.shape[0] - 1 for ax in axis]
    content_syst = content_syst.reshape(3, *shape)
    syst = ["nominal", "syst_down", "syst_up"]
    # syst = ['sf', 'sfdown', 'sfup']
    h = hist.Hist(
        hist.axis.StrCategory(syst, name="syst"),
        *axis,
        hist.storage.Double(),
        data=content_syst,
    )
    h.name = f"{tag}"
    h.label = "out"
    cset = correctionlib.convert.from_histogram(h)

    if return_histo:
        return h, cset
    return cset

In [6]:
def find_key(key: str, l: list, key_name="key"):
    for i in range(len(l)):
        if l[i][key_name] == key:
            return i

def print_keys(l: list):
    for i in range(len(l)):
        print(l[i]["key"])

In [7]:
fname = (
    path_jsonpog
    + list(ElectronWP[ERA]["TightObjWP"][eleWP]["tkSF"].values())[
        0
    ][-1].split("POG")[-1]
)
print(fname)
with gzip.open(fname) as file:
    corr = json.load(file)

/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/POG/EGM/2022_Summer22EE/electron.json.gz


In [8]:
csets = []
for wp in ["RecoBelow20", "Reco20to75", "RecoAbove75"]:
    csets.append(get_cset_electron(corr, wp, f"Electron_RecoSF_{wp}"))
    rich.print(csets[-1])

In [9]:
url_latinos = "https://raw.githubusercontent.com/latinos/mkShapesRDF/refs/heads/master/mkShapesRDF/processor/data/scale_factor/Full2022EEv12/"

fname = "electron.json"
with open("test_sf.txt", "w") as file:
    url = f"{url_latinos}/{fname}"
    print(
        "downloading",
        url,
    )
    r = requests.get(url)
    file.write(r.text)

downloading https://raw.githubusercontent.com/latinos/mkShapesRDF/refs/heads/master/mkShapesRDF/processor/data/scale_factor/Full2022EEv12//electron.json


In [10]:
fname = "test_sf.txt"

with open(fname) as file:
    corr = json.load(file)

In [11]:
# csets = []
for wp in ["passingMVA90_HWW"]:
    csets.append(get_cset_electron(corr, wp, "Electron_WP_SF"))
    rich.print(csets[-1])

In [12]:
fname = "muonSF_latinos_HWW.json"
with open("test_sf.json", "w") as file:
    url = f"{url_latinos}/{fname}"
    print(
        "downloading",
        url,
    )
    r = requests.get(url)
    file.write(r.text)

downloading https://raw.githubusercontent.com/latinos/mkShapesRDF/refs/heads/master/mkShapesRDF/processor/data/scale_factor/Full2022EEv12//muonSF_latinos_HWW.json


In [13]:
fname = "test_sf.json"
with open(fname) as file:
    corr = json.load(file)

In [14]:
import numpy as np

In [15]:
from correctionlib.schemav2 import Category, Binning, Correction


def build_data(dict):
    if dict["nodetype"] == "category":
        return Category.model_validate(
            {
                "nodetype": "category",
                "input": dict["input"],
                "content": [
                    {"key": i["key"], "value": i["value"]} for i in dict["content"]
                ],
            }
        )
    elif dict["nodetype"] == "binning":
        binning = [b for b in dict["edges"]]
        for i, val in enumerate(binning):
            if not isinstance(val, (int, float)):
                continue
            if np.isinf(val) and val < 0:
                binning[i] = "-inf"
            elif np.isinf(val) and val > 0:
                binning[i] = "inf"

        content = [
            value if isinstance(value, (int, float)) else build_data(value)
            for value in dict["content"]
        ]

        return Binning.model_validate(
            {
                "nodetype": "binning",
                "input": dict["input"],
                "edges": binning,
                "content": content,
                "flow": dict["flow"],
            }
        )
    else:
        print("Could not parse dict")
        print(dict)


def build_correction(dict):
    return Correction.model_validate(
        {
            "version": 0,
            "name": dict["name"],
            "inputs": dict["inputs"],
            "output": dict["output"],
            "data": build_data(dict["data"]),
        }
    )

In [16]:
cname = "NUM_TightID_HWW_DEN_TrackerMuons"
corr_idx = find_key(cname, corr['corrections'], "name")
csets.append(build_correction(corr['corrections'][corr_idx]))
cname = "NUM_TightPFIso_DEN_TightID_HWW"
corr_idx = find_key(cname, corr['corrections'], "name")
csets.append(build_correction(corr['corrections'][corr_idx]))

In [17]:
cset = correctionlib.schemav2.CorrectionSet(
    schema_version=2, description="", corrections=csets
)

rich.print(cset)

In [18]:
# inputs
"""
Electron corrections take syst, eta, pt. No need to clip eta, pt >=10.0 and < 200.0
Muon: eta, pt, syst. Eta between 2.4 <= eta < 2.4, and pt >= 10.0. Clip eta
"""

'\nElectron corrections take syst, eta, pt. No need to clip eta, pt >=10.0 and < 200.0\nMuon: eta, pt, syst. Eta between 2.4 <= eta < 2.4, and pt >= 10.0. Clip eta\n'