In [None]:
%load_ext autoreload
%autoreload 2

import os
import sys
import time
import argparse
import warnings
import logging
from os.path import join
from collections import defaultdict
sys.path.append('../')
warnings.filterwarnings("ignore")

from coffea import processor, util
import hist
from matplotlib import pyplot as plt
from cycler import cycler
from hist import Hist
from hist.intervals import ratio_uncertainty
from azh_analysis.utils.plotting import plot_data_vs_mc
from azh_analysis.utils.parameters import get_lumis
import warnings
warnings.filterwarnings('ignore')
#hep.style.use(["CMS", "fira", "firamath"])

mass = "300" 
years = ["2018"] # "2016preVFP", "2016postVFP", "2017", "2018"
lumis = get_lumis()
lumi = sum([lumis[year] for year in years])
if len(years)==4: lumi=lumis["Run 2"]
if len(years)==2: lumi=lumis["2016"]

variables = ["m4l", "mll", "mtt", "pt", "met"]
data, data_ss, mc, ggA, bbA = {}, {}, {}, {}, {}
for var in variables:
    data[var] = sum([sum(util.load(f"../output_test/data_UL_{year}_OS_ub.coffea")[var].values()) for year in years])
    data_ss[var] = sum([sum(util.load(f'../output_test/data_UL_{year}_SS_ub_relaxed.coffea')[var].values()) for year in years])
    mc[var] = sum([sum(util.load(f"../output_test/MC_UL_{year}_all_OS.coffea")[var].values()) for year in years])
    ggA[var] = sum([
        sum([v for k, v in util.load(f"../output_test/signal_UL_{year}_all_OS.coffea")[var].items() if (mass in k) and ("GluGlu" in k)]) 
        for year in years
    ])
    bbA[var] = sum([
        sum([v for k, v in util.load(f"../output_test/signal_UL_{year}_all_OS.coffea")[var].items() if (mass in k) and ("GluGlu" not in k)]) 
        for year in years
    ])
    
cat_labels = {
    'tt': r'$ll\tau_h\tau_h$',
    'et': r'$ll e\tau_h$',
    'mt': r'$ll\mu\tau_h$',
    'em': r'$ll e\mu$',
}

plot = True
if len(years)==4:
    outdir = f"../plots/data-mc/all"
elif len(years)==2 and "2016postVFP" in years:
    outdir = f"../plots/data-mc/2016"
elif len(years)==1:
    outdir = f"../plots/data-mc/{years[0]}"
else:
    year_str = years[0]
    for year in years[1:]: year_str = year_str + f"_{year}"
    outdir = f"././plots/data-mc/{year_str}"
print(outdir)    
print(mc["m4l"])

In [None]:
########
## m4l #
########

var = "m4l"
for cat, cat_label in cat_labels.items():
    for bcat, btag_label in {0: "0-btags", 1: "btags"}.items():
        for mass_type in ["cons"]:
            mc_s, data_s, data_ss_s, ggA_s, bbA_s = mc[var], data[var], data_ss[var], ggA[var], bbA[var]
            mc_s = (
                mc_s[:, "ee" + cat, bcat, "nom", mass_type, :] +
                mc_s[:, "mm" + cat, bcat, "nom", mass_type, :]
            )
            data_s = (
                data_s[:, "ee" + cat, bcat, "none", mass_type, :] + 
                data_s[:, "mm" + cat, bcat, "none", mass_type, :]
            )
            data_ss_s = (
                data_ss_s[:, "ee" + cat, bcat, "none", mass_type, :] + 
                data_ss_s[:, "mm" + cat, bcat, "none", mass_type, :]
            )
            ggA_s = (
                ggA_s[:, "ee"+cat, bcat, "nom", mass_type, :] + 
                ggA_s[:, "mm"+cat, bcat, "nom", mass_type, :]
            )
            bbA_s = (
                bbA_s[:, "ee"+cat, bcat, "nom", mass_type, :] + 
                bbA_s[:, "mm"+cat, bcat, "nom", mass_type, :]
            )

            var_labels = {
                "corr": r"$m_{ll\tau\tau}^\mathrm{corr}$ [GeV]",
                "cons": r"$m_{ll\tau\tau}^\mathrm{cons}$ [GeV]",
                "raw": r"$m_{ll\tau\tau}^\mathrm{raw}$ [GeV]",
            }
            plot_data_vs_mc(
                data_s, mc_s, "mass", 
                cat_label=cat_label, var_label=var_labels[mass_type], btag_label=btag_label,
                outfile=join(outdir, f"{cat}_{bcat}_{var}_{mass_type}.pdf") if plot else None,
                logscale=True,
                year=None,
                lumi=lumi,
                ggA=ggA_s, ggA_sigma=5, ggA_mass=mass,
                bbA=bbA_s, bbA_sigma=5, bbA_mass=mass,
                ylim=[0.1, 30],
                data_ss=data_ss_s,
                blind=True,
                blind_range=None,
           )

In [None]:
#########
## mll ##
#########

var = "mll"
for cat, cat_label in cat_labels.items():
    for bcat, btag_label in [("all", "all-btags")]: #{0: "0-btags", 1: "btags"}.items():
        mc_s, data_s, data_ss_s = mc[var], data[var], data_ss[var]
        mc_s = (
            mc_s[:, "ee" + cat, ::sum, "nom", :] + 
            mc_s[:, "mm" + cat, ::sum, "nom", :]
        )
        data_s = (
            data_s[:, "ee" + cat, ::sum, "none", :] + 
            data_s[:, "mm" + cat, ::sum, "none", :]
        )
        data_ss_s = (
            data_ss_s[:, "ee" + cat, ::sum, "none", :] + 
            data_ss_s[:, "mm" + cat, ::sum, "none", :]
        )
        plot_data_vs_mc(
            data_s, 
            mc_s, 
            "mll", cat_label=cat_label, var_label=r"$m_{ll}$ [GeV]", btag_label=btag_label,
            outfile=join(outdir, f"{cat}_{bcat}_{var}.pdf") if plot else None,
            lumi=lumi,
            year=None,
            data_ss=data_ss_s,
            blind=True,
            blind_range=None#[4,6],
        )

In [None]:
#########
## mtt ##
#########

var = "mtt"
for cat, cat_label in cat_labels.items():
    for mass_type in ["raw", "corr", "cons"]: 
        blind_ranges = {"raw": [3,7], "corr": [3,8], "cons": [7, 12]}
        mc_s, data_s, data_ss_s = mc[var], data[var], data_ss[var]
        mc_s = (
            mc_s[:, "ee" + cat, ::sum, "nom", mass_type, :] + 
            mc_s[:, "mm" + cat, ::sum, "nom", mass_type, :]
        )
        data_s = (
            data_s[:, "ee" + cat, ::sum, "none", mass_type, :] + 
            data_s[:, "mm" + cat, ::sum, "none", mass_type, :]
        )
        data_ss_s = (
            data_ss_s[:, "ee" + cat, ::sum, "none", mass_type, :] + 
            data_ss_s[:, "mm" + cat, ::sum, "none", mass_type, :]
        )
        plot_data_vs_mc(
            data_s, 
            mc_s, 
            "mtt", cat_label=cat_label, var_label=r"Visible $m_{tt}$ [GeV]", btag_label="all btags",
            outfile=join(outdir, f"{cat}_all_{var}-{mass_type}.pdf") if plot else None,
            lumi=lumi,
            year=None,
            data_ss=data_ss_s,
            blind=True,
            blind_range=None#blind_ranges[mass_type],
        )

In [None]:
#########
## MET ##
#########

var = "met"
for cat, cat_label in cat_labels.items():
    mc_s, data_s, data_ss_s = mc[var], data[var], data_ss[var]
    mc_s = (
        mc_s[:, "ee" + cat, ::sum, "nom", :] + 
        mc_s[:, "mm" + cat, ::sum, "nom", :]
    )
    data_s = (
        data_s[:, "ee" + cat, ::sum, "none", :] + 
        data_s[:, "mm" + cat, ::sum, "none", :]
    )
    data_ss_s = (
        data_ss_s[:, "ee" + cat, ::sum, "none", :] + 
        data_ss_s[:, "mm" + cat, ::sum, "none", :]
    )
    plot_data_vs_mc(
        data_s, 
        mc_s, 
        "met", cat_label=cat_label, var_label=r"$E_T^\mathrm{miss}$", btag_label="all btags",
        outfile=join(outdir, f"{cat}_all_{var}.pdf") if plot else None,
        lumi=lumi,
        year=None,
        data_ss=data_ss_s,
        blind=True,
        blind_range=None#[1,4],
    )

In [None]:
########
## pt ##
########

var = "pt"
for cat, cat_label in cat_labels.items():
    for bcat, btag_label in {-1: "all"}.items():
        for leg in ['1', '2', '3', '4']:
            mc_s, data_s, data_ss_s = mc[var], data[var], data_ss[var]
            mc_s = (
                mc_s[:, "ee" + cat, ::sum, "nom", leg, :] + 
                mc_s[:, "mm" + cat, ::sum, "nom", leg, :]
            )
            data_s = (
                data_s[:, "ee" + cat, ::sum, "none", leg, :] + 
                data_s[:, "mm" + cat, ::sum, "none", leg, :]
            )
            data_ss_s = (
                data_ss_s[:, "ee" + cat, ::sum, "none", leg, :] + 
                data_ss_s[:, "mm" + cat, ::sum, "none", leg, :]
            )
            plot_data_vs_mc(
                data_s, 
                mc_s, 
                "pt", cat_label=cat_label, var_label=f"Leg {leg} " + r"$p_T$", btag_label=btag_label,
                outfile=join(outdir, f"{cat}_all_{var}-{leg}.pdf") if plot else None,
                lumi=lumi,
                year=None,
                data_ss=data_ss_s,
                blind=True,
                blind_range=None,#[1,5],
            )

In [None]:
var = "eta"
for cat, cat_label in cat_labels.items():
    for bcat, btag_label in {-1: "all"}.items():
        for leg in ['1', '2', '3', '4']:
            mc_s, data_s, data_ss_s = mc[var], data[var], data_ss[var]
            mc_s = (
                mc_s[:, "ee" + cat, ::sum, "nom", leg, :] + 
                mc_s[:, "mm" + cat, ::sum, "nom", leg, :]
            )
            data_s = (
                data_s[:, "ee" + cat, ::sum, "none", leg, :] + 
                data_s[:, "mm" + cat, ::sum, "none", leg, :]
            )
            data_ss_s = (
                data_ss_s[:, "ee" + cat, ::sum, "none", leg, :] + 
                data_ss_s[:, "mm" + cat, ::sum, "none", leg, :]
            )
            plot_data_vs_mc(
                data_s, 
                mc_s, 
                "eta", cat_label=cat_label, var_label=f"Leg {leg} " + r"$\eta$", btag_label=btag_label,
                outfile=join(outdir, f"{cat}_all_{var}-{leg}.pdf") if plot else None,
                lumi=lumi,
                year=None,
                data_ss=data_ss_s,
                blind=False,
                blind_range=None,
            )

## Same-Sign Cross-Check

In [None]:
#######################
## same-sign control ##
#######################

mass = "300" 
years = ["2016preVFP", "2016postVFP", "2017", "2018"] 
lumis = get_lumis()
lumi = sum([lumis[year] for year in years])
if len(years)==4: lumi=lumis["Run 2"]
if len(years)==2: lumi=lumis["2016"]

variables = ["m4l", "mll", "mtt", "pt", "met"]
data_ss, data_ss_r, mc = {}, {}, {}
for var in variables:
    data_ss_r[var] = sum([sum(util.load(f'../output_test/data_UL_{year}_SS_ub_relaxed.coffea')[var].values()) for year in years])
    data_ss[var] = sum([sum(util.load(f'../output_test/data_UL_{year}_SS_ub_not-relaxed.coffea')[var].values()) for year in years])
    mc[var] = sum([sum(util.load(f"../output_test/MC_UL_{year}_None_SS.coffea")[var].values()) for year in years])

cat_labels = {
    'tt': r'$ll\tau_h\tau_h$',
    'et': r'$ll e\tau_h$',
    'mt': r'$ll\mu\tau_h$',
    'em': r'$ll e\mu$',
}

plot = True
if len(years)==4:
    outdir = f"../plots/data-mc/all"
elif len(years)==2 and "2016postVFP" in years:
    outdir = f"../plots/data-mc/2016"
elif len(years)==1:
    outdir = f"../plots/data-mc/{years[0]}"
else:
    year_str = years[0]
    for year in years[1:]: year_str = year_str + f"_{year}"
    outdir = f"../plots/data-mc/{year_str}"

In [None]:
########
## m4l #
########
from azh_analysis.utils.plotting import plot_data_vs_mc

var = "m4l"
for cat, cat_label in cat_labels.items():
    for bcat, btag_label in {0: "all"}.items():
        for mass_type, mass_label in {"cons": "cons"}.items():
            mc_s, data_ss_s, data_ss_r_s = mc[var], data_ss[var], data_ss_r[var]
            mc_s = (
                mc_s[:, "ee" + cat, ::sum, "nom", mass_type, :] +
                mc_s[:, "mm" + cat, ::sum, "nom", mass_type, :]
            )
            data_ss_s = (
                data_ss_s[:, "ee" + cat, ::sum, "none", mass_type, :] + 
                data_ss_s[:, "mm" + cat, ::sum, "none", mass_type, :]
            )
            data_ss_r_s = (
                data_ss_r_s[:, "ee" + cat, ::sum, "none", mass_type, :] + 
                data_ss_r_s[:, "mm" + cat, ::sum, "none", mass_type, :]
            )
            var_labels = {
                "corr": r"$m_{ll\tau\tau}^\mathrm{corr}$ [GeV]",
                "cons": r"$m_{ll\tau\tau}^\mathrm{cons}$ [GeV]",
                "raw": r"$m_{ll\tau\tau}^\mathrm{vis}$ [GeV]",
            }
            
            name = f"{cat}_{bcat}_{var}_{mass_label}_SS"
            plot_data_vs_mc(
                data_ss_s, mc_s, "mass", 
                cat_label=cat_label, var_label=var_labels[mass_type], btag_label=btag_label,
                outfile=join(outdir, name+".pdf") if plot else None,
                logscale=True,
                year=None,
                lumi=lumi,
                #ggA=ggA_s, ggA_sigma=5, ggA_mass=mass,
                #bbA=bbA_s, bbA_sigma=5, bbA_mass=mass,
                ylim=[0.01, 30],
                data_ss=data_ss_r_s,
                blind=False,
                blind_range=None,
                rootfile=join("for_alexei", name+".root"),
           )

In [None]:
#########
## mll ##
#########

var = "mll"
for cat, cat_label in cat_labels.items():
    for bcat, btag_label in [("all", "all-btags")]: #{0: "0-btags", 1: "btags"}.items():
        mc_s, data_ss_s, data_ss_r_s = mc[var], data_ss[var], data_ss_r[var]
        mc_s = (
            mc_s[:, "ee" + cat, ::sum, "nom", :] + 
            mc_s[:, "mm" + cat, ::sum, "nom", :]
        )
        data_ss_s = (
            data_ss_s[:, "ee" + cat, ::sum, "none", :] + 
            data_ss_s[:, "mm" + cat, ::sum, "none", :]
        )
        data_ss_r_s = (
            data_ss_r_s[:, "ee" + cat, ::sum, "none", :] + 
            data_ss_r_s[:, "mm" + cat, ::sum, "none", :]
        )
        plot_data_vs_mc(
            data_ss_s, 
            mc_s, 
            "mll", cat_label=cat_label, var_label=r"$m_{ll}$ [GeV]", btag_label=btag_label,
            outfile=join(outdir, f"{cat}_{bcat}_{var}_SS.pdf") if plot else None,
            lumi=lumi,
            year=None,
            data_ss=data_ss_r_s,
            blind=False,
            blind_range=None,
        )

In [None]:
#########
## mtt ##
#########

var = "mtt"
for cat, cat_label in cat_labels.items():
    for mass_label, mass_type in {
        "Visible": "raw", #"Corrected": "corr", "Constrained": "cons",
    }.items(): 
        blind_ranges = {"raw": [3,7], "corr": [3,8], "cons": [7, 12]}
        mc_s, data_ss_s, data_ss_r_s = mc[var], data_ss[var], data_ss_r[var]
        mc_s = (
            mc_s[:, "ee" + cat, ::sum, "nom", mass_type, :] + 
            mc_s[:, "mm" + cat, ::sum, "nom", mass_type, :]
        )
        data_ss_s = (
            data_ss_s[:, "ee" + cat, ::sum, "none", mass_type, ::2j] + 
            data_ss_s[:, "mm" + cat, ::sum, "none", mass_type, :]
        )
        data_ss_r_s = (
            data_ss_r_s[:, "ee" + cat, ::sum, "none", mass_type, :] + 
            data_ss_r_s[:, "mm" + cat, ::sum, "none", mass_type, :]
        )
        plot_data_vs_mc(
            data_ss_s, 
            mc_s, 
            "mtt", cat_label=cat_label, var_label=f"{mass_label} " + r"$m_{tt}$ [GeV]", btag_label="all btags",
            outfile=join(outdir, f"{cat}_all_{var}-{mass_type}_SS.pdf") if plot else None,
            lumi=lumi,
            year=None,
            data_ss=data_ss_r_s,
            blind=False,
            blind_range=None#blind_ranges[mass_type],
        )

In [None]:
#########
## MET ##
#########

var = "met"
for cat, cat_label in cat_labels.items():
    mc_s, data_ss_s, data_ss_r_s = mc[var], data_ss[var], data_ss_r[var]
    mc_s = (
        mc_s[:, "ee" + cat, ::sum, "nom", :] + 
        mc_s[:, "mm" + cat, ::sum, "nom", :]
    )
    data_ss_s = (
        data_ss_s[:, "ee" + cat, ::sum, "none", :] + 
        data_ss_s[:, "mm" + cat, ::sum, "none", :]
    )
    data_ss_r_s = (
        data_ss_r_s[:, "ee" + cat, ::sum, "none", :] + 
        data_ss_r_s[:, "mm" + cat, ::sum, "none", :]
    )
    plot_data_vs_mc(
        data_ss_s, 
        mc_s, 
        "met", cat_label=cat_label, var_label=r"$E_T^\mathrm{miss}$", btag_label="all btags",
        outfile=join(outdir, f"{cat}_all_{var}_SS.pdf") if plot else None,
        lumi=lumi,
        year=None,
        data_ss=data_ss_r_s,
        blind=False,
        blind_range=None#[1,4],
    )

In [None]:
########
## pt ##
########

var = "pt"
for cat, cat_label in cat_labels.items():
    for bcat, btag_label in {-1: "all"}.items():
        for leg in ['1', '2', '3', '4']:
            mc_s, data_s, data_ss_s = mc[var], data[var], data_ss[var]
            mc_s = (
                mc_s[:, "ee" + cat, ::sum, "nom", leg, :] + 
                mc_s[:, "mm" + cat, ::sum, "nom", leg, :]
            )
            data_s = (
                data_s[:, "ee" + cat, ::sum, "none", leg, :] + 
                data_s[:, "mm" + cat, ::sum, "none", leg, :]
            )
            data_ss_s = (
                data_ss_s[:, "ee" + cat, ::sum, "none", leg, :] + 
                data_ss_s[:, "mm" + cat, ::sum, "none", leg, :]
            )
            plot_data_vs_mc(
                data_s, 
                mc_s, 
                "pt", cat_label=cat_label, var_label=f"Leg {leg} " + r"$p_T$", btag_label=btag_label,
                outfile=join(outdir, f"{cat}_all_{var}-{leg}_SS.pdf") if plot else None,
                lumi=lumi,
                year=None,
                data_ss=data_ss_s,
                blind=False,
                blind_range=None,#[1,5],
            )

In [None]:
import uproot
uproot.open("for_alexei/tt_0_m4l_cons_SS.root")["data"].to_hist().plot1d()