In [11]:
"""
1 July 2021
Abraham Tishelman-Charny

The purpose of this notebook is to plot Data / MC of WWZ in a control region of 0-100 Mgg 
in order to validate the HH->WWgg MC
"""

import uproot
from matplotlib import pyplot as plt 
import numpy as np
import copy 
from collections import OrderedDict
import operator 
import collections 
import pandas as pd 
import awkward as ak
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

import matplotlib 

In [12]:
##-- With DNN score 
# Files with DNN score: For SM SL DNN AN_20_165_v7 (with MET fix)
#direc = ""

#direc = "/eos/user/a/atishelm/ntuples/HHWWgg_DNN/MultiClassifier/HHWWyyDNN_WithHggFactor2-200Epochs-3ClassMulticlass_EvenSingleH_2Hgg_withKinWeightCut10_BalanceYields/"
direc = "/eos/user/a/atishelm/ntuples/HHWWgg_DNN/MultiClassifier/HHWWyyDNN_SL_MultiClass_WithMETFix_12LOHH_500epochs_BalanceYields/DY_CR/"

# p_data = "%s/Data_WWZPhaseSpace_2017_MoreVars.root"%(direc) ##-- DoubleEG data 
p_data = "%s/SingleElectron_Data_2017_MoreVars.root"%(direc) ##-- SingleElectron data 
# p_DYJets = "%s/Zee_2017_MoreVars.root"%(direc)
p_DYJets = "%s/Zee_v14_ext1-v1_hadded_MoreVars.root"%(direc)
p_WWZ = "%s/WWZ_MoreVars.root"%(direc)

# ##-- Without DNN score 
# direc = "/eos/user/a/atishelm/ntuples/HHWWgg_flashgg/WWZ_SignalTopology_Checks/2017/"
# # p_data = "%s/Data_WWZPhaseSpace_2017_MoreVars.root"%(direc) ##-- DoubleEG data 
# p_data = "%s/SingleElectron_Data_2017_hadded/SingleElectron_Data_2017.root"%(direc) ##-- SingleElectron data 
# # p_DYJets = "%s/Zee_2017_MoreVars.root"%(direc)
# p_DYJets = "%s/Zee_hadded/Zee_v14_ext1-v1_hadded.root"%(direc)
# p_WWZ = "%s/WWZ_2017_hadded/WWZ.root"%(direc)

backgrounds = {
     
    
    # gamma jet 
    "GJet_Pt-20to40" : "GJet_20to40",
    "GJet_Pt-20toInf" : "GJet_20toInf",
    "GJet_Pt-40toInf": "GJet_Pt40toInf",
    
    # gamma gamma jet 
    "DiPhotonJetsBox_M40_80" : "DiPho_M40_80",
    "DiPhotonJetsBox_MGG-80toInf" : "DiPho_M80ToInf",
#     "WW_TuneCP5" : "WW",
#     "TTGJets" : "TTGJets",    
    
    # tt(V) gammas
    "TTJets" : "TTJets",
    "TTGG_0Jets" : "TTGG_0Jets",

    "TTWJetsToLNu" : "TTWJetsToLNu",
#     "WGToLNuG_01J" : "WGToLNuG_01J",
    "TTZToLLNuNu" : "TTZToLLNuNu",
    "ZGToLLG_01J" : "ZGToLLG_01J",    
    
    # V(s) + gamma(s) + jets 
    "W1JetsToLNu_LHEWpT_0-50" : "W1JetsToLNu_LHEWpT_0_50",
    "W1JetsToLNu_LHEWpT_50-150" : "W1JetsToLNu_LHEWpT_50_150",
    "W1JetsToLNu_LHEWpT_150-250" : "W1JetsToLNu_LHEWpT_150_250",
    "W1JetsToLNu_LHEWpT_250-400" : "W1JetsToLNu_LHEWpT_250_400",
    "W1JetsToLNu_LHEWpT_400-inf" : "W1JetsToLNu_LHEWpT_400_inf",
    
    "W2JetsToLNu_LHEWpT_0-50" : "W2JetsToLNu_LHEWpT_0_50",
    "W2JetsToLNu_LHEWpT_50-150" : "W2JetsToLNu_LHEWpT_50_150",
    "W2JetsToLNu_LHEWpT_150-250" : "W2JetsToLNu_LHEWpT_150_250",
    "W2JetsToLNu_LHEWpT_250-400" : "W2JetsToLNu_LHEWpT_250_400",
    "W2JetsToLNu_LHEWpT_400-inf" : "W2JetsToLNu_LHEWpT_400_inf",    
    
    "W3JetsToLNu" : "W3JetsToLNu",
    "W4JetsToLNu" : "W4JetsToLNu",    
    
    "WGGJets" : "WGGJets",
    "WGJJToLNu" : "WGJJToLNu",
    "WWTo1L1Nu2Q" : "WWTo1L1Nu2Q",
    "ttWJets" : "ttWJets",
    "ZZ" : "ZZ",    
    
    # H to gg 
    "GluGluHToGG" : "GluGluHToGG",
    "VBFHToGG" : "VBFHToGG",
    "VHToGG" : "VHToGG",
    
#     "W1JetsToLNu_LHEWpT_0-50" : "W1JetsToLNu_LHEWpT_0_50",
#     "W1JetsToLNu_LHEWpT_50-150" : "W1JetsToLNu_LHEWpT_50_150",
#     "W1JetsToLNu_LHEWpT_150-250" : "W1JetsToLNu_LHEWpT_150_250",
#     "W1JetsToLNu_LHEWpT_250-400" : "W1JetsToLNu_LHEWpT_250_400",
#     "W1JetsToLNu_LHEWpT_400-inf" : "W1JetsToLNu_LHEWpT_400_inf",
    
#     "W2JetsToLNu_LHEWpT_0-50" : "W2JetsToLNu_LHEWpT_0_50",
#     "W2JetsToLNu_LHEWpT_50-150" : "W2JetsToLNu_LHEWpT_50_150",
#     "W2JetsToLNu_LHEWpT_150-250" : "W2JetsToLNu_LHEWpT_150_250",
#     "W2JetsToLNu_LHEWpT_250-400" : "W2JetsToLNu_LHEWpT_250_400",
#     "W2JetsToLNu_LHEWpT_400-inf" : "W2JetsToLNu_LHEWpT_400_inf",    
    
#     "W3JetsToLNu" : "W3JetsToLNu",
#     "W4JetsToLNu" : "W4JetsToLNu",
    
}

background_directories = {
    "GJet_Pt-20to40" : "Flashgg_Backgrounds_Zee_Preselections_2017_hadded",
    "GJet_Pt-20toInf" : "Flashgg_Backgrounds_Zee_Preselections_2017_hadded",
    "GJet_Pt-40toInf": "Flashgg_Backgrounds_Zee_Preselections_2017_hadded",
    "DiPhotonJetsBox_M40_80" : "Flashgg_Backgrounds_Zee_Preselections_2017_hadded",
    "DiPhotonJetsBox_MGG-80toInf" : "Flashgg_Backgrounds_Zee_Preselections_2017_hadded",
#     "WW_TuneCP5" : "WW",
#     "TTGJets" : "TTGJets",
    "TTJets" : "Flashgg_Backgrounds_Zee_Preselections_2017_hadded",
    "TTGG_0Jets" : "Flashgg_Backgrounds_Zee_Preselections_2017_hadded",
    "WGGJets" : "HHWWgg_Backgrounds_ZeePreselections_2017_hadded",
    "WGJJToLNu" : "HHWWgg_Backgrounds_ZeePreselections_2017_hadded",
    "WWTo1L1Nu2Q" : "HHWWgg_Backgrounds_ZeePreselections_2017_hadded",
    "ttWJets" : "HHWWgg_Backgrounds_ZeePreselections_2017_hadded",
    "ZZ" : "More_Backgrounds_hadded",
    "TTWJetsToLNu" : "More_Backgrounds_hadded",
#     "WGToLNuG_01J" : "More_Backgrounds_v2_hadded",
    "TTZToLLNuNu" : "More_Backgrounds_hadded",
    "ZGToLLG_01J" : "More_Backgrounds_v3_hadded",
    
    "GluGluHToGG" : "Higgs_bkg_2017_125Only_hadded",
    "VBFHToGG" : "Higgs_bkg_2017_125Only_hadded",
    "VHToGG" : "Higgs_bkg_2017_125Only_hadded",
    
    ##-- WJets 
    "W1JetsToLNu_LHEWpT_0-50" : "WJets_pTBinned_hadded",
    "W1JetsToLNu_LHEWpT_50-150" : "WJets_pTBinned_hadded",
    "W1JetsToLNu_LHEWpT_150-250" : "WJets_pTBinned_hadded",
    "W1JetsToLNu_LHEWpT_250-400" : "WJets_pTBinned_hadded",
    "W1JetsToLNu_LHEWpT_400-inf" : "WJets_pTBinned_hadded",
    
    "W2JetsToLNu_LHEWpT_0-50" : "WJets_pTBinned_hadded",
    "W2JetsToLNu_LHEWpT_50-150" : "WJets_pTBinned_hadded",
    "W2JetsToLNu_LHEWpT_150-250" : "WJets_pTBinned_hadded",
    "W2JetsToLNu_LHEWpT_250-400" : "WJets_pTBinned_hadded",
    "W2JetsToLNu_LHEWpT_400-inf" : "WJets_pTBinned_hadded",
    
    "W3JetsToLNu" : "WJets_pTBinned_hadded",
    "W4JetsToLNu" : "WJets_pTBinned_hadded"
    
}

for b in backgrounds.keys():
    bName = backgrounds[b]
    bDirec = background_directories[b]
    exec("p_%s = '%s/%s_hadded_MoreVars.root'"%(bName, direc, b)) ##-- With DNN score 
#     exec("p_%s = '%s/%s/%s_hadded.root'"%(bName, direc, bDirec, b))
    exec("f_%s = uproot.open(p_%s)"%(bName, bName))

f_data = uproot.open(p_data)
f_DYJets = uproot.open(p_DYJets)
f_WWZ = uproot.open(p_WWZ)

tags = [str(i) for i in range(0, 1)]
# tags = [str(i) for i in range(2, 4)]
# tags = [str(i) for i in range(0, 3)]
# tags = [str(i) for i in range(0, 4)]

variables = [
    "CMS_hgg_mass",
    "weight",
    "evalDNN_HH",
#     "Leading_Photon_pt",
#     "Subleading_Photon_pt",    
#     "METCor_pt",
#     "METCor_phi"
]

# variables = [
#     "CMS_hgg_mass",
# #     "evalDNN_HH",
#     "weight",
#     "Leading_Photon_pt",
#     "Subleading_Photon_pt",
# #     Scaled_Leading_Photon_pt,
#     "goodJets_0_pt",
#     "goodLepton_pt",
#     "Wmass_goodJets12",
# #     Scaled_Subleading_Photon_pt,
#     "goodJets_1_E",
#     "goodJets_1_pt",
#     "goodLepton_E",
#     "METCor_pt",
#     "goodJets_0_E",
#     "goodLepton_phi",
#     "Leading_Photon_MVA",
#     "goodLepton_eta",
#     "goodJets_1_eta",
#     "goodJets_1_phi",
#     "Subleading_Photon_eta",
# #     Leading_Jet_bscore,
# #     Subleading_Jet_bscore,
#     "Subleading_Photon_phi",
#     "N_goodJets",
#     "goodJets_0_phi",
# #     Scaled_Leading_Photon_E,
# #     Scaled_Subleading_Photon_E,
#     "Leading_Photon_phi",
#     "Subleading_Photon_MVA",
#     "goodJets_0_eta",
#     "Leading_Photon_eta",
#     "Wmt_L"     
# ]

for tag in tags:
    print("On Tag:",tag)
    
    for v in variables:
        print("On variable:",v)
        
        ##-- no tagsDumper/trees
        exec("Data_%s_tag_%s = f_data['Data_13TeV_HHWWggTag_%s']['%s'].array()"%(v, tag, tag, v))
        exec("DYJets_%s_tag_%s = f_DYJets['DYJetsToLL_M_50_TuneCP5_13TeV_amcatnloFXFX_pythia8_13TeV_HHWWggTag_%s']['%s'].array()"%(v, tag, tag, v))    
        exec("WWZ_%s_tag_%s = f_WWZ['WWZ_4F_TuneCP5_13TeV_amcatnlo_pythia8_13TeV_HHWWggTag_%s']['%s'].array()"%(v, tag, tag, v))  
        
#         ##-- with tagsDumper/trees
#         exec("Data_%s_tag_%s = f_data['tagsDumper/trees']['Data_13TeV_HHWWggTag_%s']['%s'].array()"%(v, tag, tag, v))
#         exec("DYJets_%s_tag_%s = f_DYJets['tagsDumper/trees']['DYJetsToLL_M_50_TuneCP5_13TeV_amcatnloFXFX_pythia8_13TeV_HHWWggTag_%s']['%s'].array()"%(v, tag, tag, v))    
#         exec("WWZ_%s_tag_%s = f_WWZ['tagsDumper/trees']['WWZ_4F_TuneCP5_13TeV_amcatnlo_pythia8_13TeV_HHWWggTag_%s']['%s'].array()"%(v, tag, tag, v))          
        
        for b in backgrounds:
            bName = backgrounds[b]
            exec("bFile = f_%s"%(bName))
            #print("bFile.keys():",bFile.keys())
            bTree = bFile.keys()[0] # taking first
            bTree = bTree.split(';')[0]
            
#             bTree = bFile.keys()[2] ##-- with tagsDumper/trees structure, 3rd should have name without tagsDumper/trees
#             bTree = bTree.split(';')[0]
#             bTree = bTree.replace("'", "")
#             bTree = bTree.replace("_HHWWggTag_0", "")
#             bTree = bTree.replace("_HHWWggTag_1", "")
#             bTree = bTree.replace("_HHWWggTag_2", "")
#             bTree = bTree.replace("tagsDumper/trees", "")
            exec("%s_%s_tag_%s = bFile['%s']['%s'].array()"%(bName, v, tag, bTree, v))  
#             exec("%s_%s_tag_%s = bFile['%s_HHWWggTag_%s']['%s'].array()"%(bName, v, tag, bTree, tag, v))  
#             exec("%s_%s_tag_%s = bFile['tagsDumper/trees']['%s_HHWWggTag_%s']['%s'].array()"%(bName, v, tag, bTree, tag, v))  
        
        ##-- with tagsDumper/trees TDirect structure
#         exec("Data_%s_tag_%s = f_data['tagsDumper/trees']['Data_13TeV_HHWWggTag_%s']['%s'].array()"%(v, tag, tag, v))
#         exec("DYJets_%s_tag_%s = f_DYJets['tagsDumper/trees']['DYJetsToLL_M_50_TuneCP5_13TeV_amcatnloFXFX_pythia8_13TeV_HHWWggTag_%s']['%s'].array()"%(v, tag, tag, v))    
#         exec("WWZ_%s_tag_%s = f_WWZ['tagsDumper/trees']['WWZ_4F_TuneCP5_13TeV_amcatnlo_pythia8_13TeV_HHWWggTag_%s']['%s'].array()"%(v, tag, tag, v))           
        
print("DONE")

On Tag: 0
On variable: CMS_hgg_mass
On variable: weight
On variable: evalDNN_HH
DONE


In [13]:
nbins_glob = 20

binDict = {

#     "evalDNN_HH" : [nbins_glob, 0, 1],
#     "evalDNN_HH" : [30, 0.1, 1],
#     "evalDNN_HH" : [9, 0.1, 1],
#     "evalDNN_HH" : [9, 0.1, 1],
    "evalDNN_HH" : [15, 0.1, 1],
#     "evalDNN_HH" : [30, 0.1, 1],
#     "evalDNN_HH" : [3, 0.93571, 1],
#     "evalDNN_HH" : [8, 0.82, 0.93571],
#     "evalDNN_HH" : [10, 0.64, 0.82],
#     "evalDNN_HH" : [70, 0, 1],
    "METCor_phi" : [nbins_glob, -3.15,3.15],
    
    "Leading_Photon_MVA": [nbins_glob,-1,1],
    "Subleading_Photon_MVA": [nbins_glob,-1,1],
#     "CMS_hgg_mass": [nbins_glob,100,180],
    "CMS_hgg_mass": [nbins_glob,80,100],
#     "CMS_hgg_mass": [nbins_glob,0,100],
    "weight":[nbins_glob,-10,10],
    "puweight":[nbins_glob,-2,2],
    "mjj" : [nbins_glob,0,300],
    "e_mT" : [nbins_glob,0,300],
    "mu_mT" : [nbins_glob,0,300],
    "dr_gg" : [nbins_glob,0,3],
    "dr_jj" : [nbins_glob,0,3],
    "pT_gg" : [nbins_glob,0,400],

    "goodJets_0_pt" : [nbins_glob,0,500],
    "goodLepton_pt" : [nbins_glob,0,360],
    "Wmass_goodJets12": [nbins_glob,0,750],
    # "Subleading_Photon_pt/CMS_hgg_mass",
    "goodJets_1_E": [nbins_glob,0,360],
    "goodJets_1_pt": [nbins_glob,0,360],
    "goodLepton_E": [nbins_glob,0,360],
    "METCor_pt": [nbins_glob,0,400],
    "goodJets_0_E": [nbins_glob,0,360],
    "Scaled_Leading_Photon_pt" : [nbins_glob,0,3],
    "Scaled_Subleading_Photon_pt" : [nbins_glob,0,1.5],
    "Scaled_Leading_Photon_E" : [nbins_glob,0,3],
    "Scaled_Subleading_Photon_E" : [nbins_glob,0,1.5],
    "Leading_Jet_bscore" : [nbins_glob,0,1],
    "Subleading_Jet_bscore" : [nbins_glob,0,1],
    "Diphoton_pT" : [nbins_glob,60,250],
    "Wmt_L" : [nbins_glob,0,300],

    ##-- Custom binnings 
    # "Leading_Photon_MVA": [20,-1,1],
    # "Subleading_Photon_MVA": [20,-1,1],
    # "CMS_hgg_mass": [30,100,180],
    # "weight":[1000,-10,10],
    # "puweight":[1000,-2,2],
    # "mjj" : [100,0,300],
    # "e_mT" : [100,0,300],
    # "mu_mT" : [100,0,300],
    # "dr_gg" : [60,0,3],
    # "dr_jj" : [60,0,3],
    # "pT_gg" : [40,0,400],

    # "goodJets_0_pt" : [18,0,360],
    # "goodLepton_pt" : [18,0,360],
    # "Wmass_goodJets12": [25,0,500],
    # # "Subleading_Photon_pt/CMS_hgg_mass",
    # "goodJets_1_E": [18,0,360],
    # "goodJets_1_pt": [18,0,360],
    # "goodLepton_E": [18,0,360],
    # "METCor_pt": [40,0,400],
    # "goodJets_0_E": [18,0,360],
    # "Scaled_Leading_Photon_pt" : [30,0,3],
    # "Scaled_Subleading_Photon_pt" : [15,0,1.5],
    # "Scaled_Leading_Photon_E" : [30,0,3],
    # "Scaled_Subleading_Photon_E" : [15,0,1.5],
    # "Leading_Jet_bscore" : [50,0,1],
    # "Subleading_Jet_bscore" : [50,0,1],
    # "Wmt_L" : [30,0,300],


    # "evalDNN" : [20,0,1.00001] # To include value == 1 
    # "evalDNN" : [10,0,1.00001] # To include value == 1 
    # "evalDNN" : [25,0,1.00001] # To include value == 1 
    # "evalDNN" : [100,0,1.00001] # To include value == 1 
#         "evalDNN" : [nDNNbins,evalDNNmin,evalDNNmax], # To include value == 1 
#         "evalDNN_HH" : [nDNNbins,evalDNNmin,evalDNNmax], # To include value == 1 
    "Subleading_Photon_pt" : [24,0,120]
    # "evalDNN" : [10,0,1] # To include value == 1 
}   


def Add_CMS_Header(plt, lumi, isWide, ax, text, addLumi):
    ##-- Upper left plot text
    ##-- CMS 
    fontSize = 25
    plt.text(
        # 0.05, 0.9, u"CMS $\it{Preliminary}$",
        0., 1., u"CMS ",
        fontsize=fontSize, fontweight='bold',
        horizontalalignment='left',
        verticalalignment='bottom',
        transform=ax.transAxes
    )

    
    if(isWide):
        prelim_x = 0.095
    else:
        prelim_x = 0.12
        
    prelim_x = 0.07
    
    ##-- Preliminary 
    plt.text(
#         prelim_x, 0.996, u"$\it{Preliminary}$",
        prelim_x, 0.9, u"$\it{%s}$"%(text),
        fontsize=(0.95)*(fontSize),
        horizontalalignment='left',
        verticalalignment='bottom',
        transform=ax.transAxes
    )      

    if(addLumi):
        upperRightText = r"%s fb$^{-1}$ (13 TeV)"%(lumi)
    else:
        upperRightText = r"(13 TeV)"

    ##-- Lumi 
    plt.text(
        1., 1., upperRightText,
        fontsize=fontSize, horizontalalignment='right', 
        verticalalignment='bottom', 
        transform=ax.transAxes
    )          

    # specialVars = {
#     "Diphoton_pT" : ["add", "Leading_Photon_pt", "Subleading_Photon_pt"]
# }
specialVars = {
    "Scaled_Leading_Photon_pt" : ["divide", "Leading_Photon_pt", "CMS_hgg_mass"],
    "Scaled_Subleading_Photon_pt" : ["divide", "Subleading_Photon_pt", "CMS_hgg_mass"],
    "Diphoton_pT" : ["add", "Leading_Photon_pt", "Subleading_Photon_pt"]
    
}

variables = [
#     "Diphoton_pT"
#     "CMS_hgg_mass",
#     "METCor_pt",
#     "METCor_phi", # just to check 
    "evalDNN_HH",
#     "Leading_Photon_pt",
#     "Scaled_Leading_Photon_pt",
#     "weight",
]

# variables = [
#     "CMS_hgg_mass",
#     "evalDNN_HH",
# #     "weight",
    
#     "Scaled_Leading_Photon_pt",
#     "Scaled_Subleading_Photon_pt",
    
#     "goodJets_0_pt",
#     "goodLepton_pt",
#     "Wmass_goodJets12",
#     "goodJets_1_E",
#     "goodJets_1_pt",
#     "goodLepton_E",
#     "METCor_pt",
#     "goodJets_0_E",
#     "goodLepton_phi",
#     "Leading_Photon_MVA",
#     "goodLepton_eta",
#     "goodJets_1_eta",
#     "goodJets_1_phi",
#     "Subleading_Photon_eta",
# # #     Leading_Jet_bscore,
# # #     Subleading_Jet_bscore,
#     "Subleading_Photon_phi",
#     "N_goodJets",
#     "goodJets_0_phi",
# # #     Scaled_Leading_Photon_E,
# # #     Scaled_Subleading_Photon_E,
#     "Leading_Photon_phi",
#     "Subleading_Photon_MVA",
#     "goodJets_0_eta",
#     "Leading_Photon_eta",
#     "Wmt_L"     
# ]


background_cats = {
    
    # gamma jet 
    "GJet_Pt-20to40" : r"$\gamma$Jet",
    "GJet_Pt-20toInf" : r"$\gamma$Jet",
    "GJet_Pt-40toInf": r"$\gamma$Jet",
    
    # gamma gamma jet 
    "DiPhotonJetsBox_M40_80" : r"$\gamma\gamma+$jets",
    "DiPhotonJetsBox_MGG-80toInf" : r"$\gamma\gamma+$jets",
#     "WW_TuneCP5" : "WW",
#     "TTGJets" : "TTGJets",
    
    # tt(V) gammas
    "TTJets" : r"tt(V)$\gamma$(s)",
    "TTGG_0Jets" : r"tt(V)$\gamma$(s)",
    "ttWJets" : r"tt(V)$\gamma$(s)",
    "TTWJetsToLNu" : r"tt(V)$\gamma$(s)",
    "TTZToLLNuNu" : r"tt(V)$\gamma$(s)",
    
    # V(s) + gamma(s) + jets 
    "WGGJets" : r"V(s)$+\gamma$(s)$+$jets",
    "WGJJToLNu" : r"V(s)$+\gamma$(s)$+$jets",
    "WWTo1L1Nu2Q" : r"V(s)$+\gamma$(s)$+$jets",
    "ZZ" : r"V(s)$+\gamma$(s)$+$jets",
    "WGToLNuG_01J" : r"V(s)$+\gamma$(s)$+$jets",
    "ZGToLLG_01J" : r"V(s)$+\gamma$(s)$+$jets",
    
    "W1JetsToLNu_LHEWpT_0-50" : r"V(s)$+\gamma$(s)$+$jets",
    "W1JetsToLNu_LHEWpT_50-150" : r"V(s)$+\gamma$(s)$+$jets",
    "W1JetsToLNu_LHEWpT_150-250" : r"V(s)$+\gamma$(s)$+$jets",
    "W1JetsToLNu_LHEWpT_250-400" : r"V(s)$+\gamma$(s)$+$jets",
    "W1JetsToLNu_LHEWpT_400-inf" : r"V(s)$+\gamma$(s)$+$jets",
    
    "W2JetsToLNu_LHEWpT_0-50" : r"V(s)$+\gamma$(s)$+$jets",
    "W2JetsToLNu_LHEWpT_50-150" : r"V(s)$+\gamma$(s)$+$jets",
    "W2JetsToLNu_LHEWpT_150-250" : r"V(s)$+\gamma$(s)$+$jets",
    "W2JetsToLNu_LHEWpT_250-400" : r"V(s)$+\gamma$(s)$+$jets",
    "W2JetsToLNu_LHEWpT_400-inf" : r"V(s)$+\gamma$(s)$+$jets",
    
    "W3JetsToLNu" : r"V(s)$+\gamma$(s)$+$jets",
    "W4JetsToLNu" : r"V(s)$+\gamma$(s)$+$jets",    
 
    # H to gg 
    "GluGluHToGG" : r"H$\rightarrow\gamma\gamma$",
    "VBFHToGG" : r"H$\rightarrow\gamma\gamma$",
    "VHToGG" : r"H$\rightarrow\gamma\gamma$",    
    
}

# Color(26/255., 188/255., 156/255., "turqoise"),
# Color( 46/255., 204/255., 113/255.,"emerland"      ),
# Color( 52/255., 152/255., 219/255.,"peterriver"   ),
# Color(155/255.,  89/255., 182/255.,"amethyst"      ),
# Color( 52/255.,  73/255.,  94/255.,"wet-asphalt"   ),
# Color( 22/255., 160/255., 133/255.,"green-sea"     ),
# Color( 39/255., 174/255.,  96/255.,"nephritis"     ),
# Color( 41/255., 128/255., 185/255.,"belize-hole"   ),
# Color(142/255.,  68/255., 173/255.,"wisteria"      ),
# Color( 44/255.,  62/255.,  80/255.,"midnight-blue" ),
# Color(241/255., 196/255.,  15/255.,"sunflower"    ),
# Color(230/255., 126/255.,  34/255.,"carrot"        ),
# Color(231/255.,  76/255.,  60/255.,"alizarin"      ),
# Color(236/255., 240/255., 241/255.,"clouds"        ),
# Color(149/255., 165/255., 166/255.,"concrete"      ),
# Color(243/255., 156/255.,  18/255.,"orange"        ),
# Color(211/255.,  84/255.,   0/255.,"pumpkin"       ),
# Color(192/255.,  57/255.,  43/255.,"pomegranate"   ),
# Color(189/255., 195/255., 199/255.,"silver"        ),
# Color(127/255., 140/255., 141/255.,"asbestos"      ),     

# "H\\rightarrow\gamma\gamma": "ROOT.turqoise",
# "\gamma+jet" : "ROOT.peterriver",
# "W\gamma(s)+jets" : "ROOT.wisteria",
# "\gamma\gamma+jets" : "ROOT.sunflower", 
# "tt\gamma(s)+jets":"ROOT.carrot",
# "Signal": "ROOT.alizarin",

cat_colors = {
    "$\gamma$Jet" : (52/255., 152/255., 219/255),
    "$\gamma\gamma+$jets" : (241/255., 196/255.,  15/255.),
    r"tt(V)$\gamma$(s)" : (230/255., 126/255.,  34/255.),
    "other" : "C6",
    r"H$\rightarrow\gamma\gamma$" : (26/255., 188/255., 156/255.),
    r"V(s)$+\gamma$(s)$+$jets" : ((142/255.,  68/255., 173/255))
}

def hist_bin_uncertainty(data, weights, bin_edges):
    """
    The statistical uncertainity per bin of the binned data.
    If there are weights then the uncertainity will be the root of the
    sum of the weights squared.
    If there are no weights (weights = 1) this reduces to the root of
    the number of events.

    Args:
        data: `array`, the data being histogrammed.
        weights: `array`, the associated weights of the `data`.
        bin_edges: `array`, the edges of the bins of the histogram.

    Returns:
        bin_uncertainties: `array`, the statistical uncertainity on the bins.

    Example:
    >>> x = np.array([2,9,4,8])
    >>> w = np.array([0.1,0.2,0.3,0.4])
    >>> edges = [0,5,10]
    >>> hist_bin_uncertainty(x, w, edges)
    array([ 0.31622777,  0.4472136 ])
    >>> hist_bin_uncertainty(x, None, edges)
    array([ 1.41421356,  1.41421356])
    >>> hist_bin_uncertainty(x, np.ones(len(x)), edges)
    array([ 1.41421356,  1.41421356])
    """
    # Bound the data and weights to be within the bin edges
    in_range_index = [idx for idx in range(len(data))
                      if data[idx] > min(bin_edges) and data[idx] < max(bin_edges)]
    in_range_data = np.asarray([data[idx] for idx in in_range_index])

    if weights is None or np.array_equal(weights, np.ones(len(weights))):
        # Default to weights of 1 and thus uncertainty = sqrt(N)
        in_range_weights = np.ones(len(in_range_data))
    else:
        in_range_weights = np.asarray([weights[idx] for idx in in_range_index])

    # Bin the weights with the same binning as the data
    bin_index = np.digitize(in_range_data, bin_edges)
    # N.B.: range(1, bin_edges.size) is used instead of set(bin_index) as if
    # there is a gap in the data such that a bin is skipped no index would appear
    # for it in the set
    binned_weights = np.asarray(
        [in_range_weights[np.where(bin_index == idx)[0]] for idx in range(1, len(bin_edges))])
    bin_uncertainties = np.asarray(
        [np.sqrt(np.sum(np.square(w))) for w in binned_weights])
    return bin_uncertainties


"""
To make shaded uncertainty bars per bin for MC stacks 
"""

# def make_error_boxes(verbose_, ax, xdata, ydata, xerror, yerror, facecolor='r',
#                      edgecolor='none', alpha=0.5):
def make_error_boxes(verbose_, ax, xdata, ydata, xerror, yerror, facecolor='None',
                     edgecolor='none', alpha=0.5):    

    # Loop over data points; create box from errors at each point
    if(verbose_):
        print("xdata:",xdata)
        print("ydata:",ydata)
        print("xerror:",xerror)
        print("yerror:",yerror)
        print("xerror.T:",xerror.T)
        print("yerror.T:",yerror.T)    
        print("zip(xdata, ydata, xerror.T, yerror.T):",zip(xdata, ydata, xerror.T, yerror.T))
    errorboxes = [Rectangle((x - xe[0], y - ye[0]), xe.sum(), ye.sum(), fill=False, hatch='//////', lw=0) for x, y, xe, ye in zip(xdata, ydata, xerror.T, yerror.T)]
#     errorboxes = [Rectangle((x - xe[0], y - ye[0]), xe.sum(), ye.sum(), linewidth=0, fill=None, hatch='///') for x, y, xe, ye in zip(xdata, ydata, xerror.T, yerror.T)]

    # Create patch collection with specified colour/alpha
    pc = PatchCollection(errorboxes, facecolor=facecolor, alpha=alpha,
                         edgecolor=edgecolor)

    # Add collection to axes
    ax.add_collection(pc)

    # Plot errorbars
    for rect in errorboxes:
        ax.add_patch(rect)
        
    #artists = []
    #artists = ax.errorbar(xdata, ydata, xerr=xerror, yerr=yerror,
                          #fmt='none', ecolor='k', elinewidth=0)
    #return artists
    
print("DONE")

DONE


In [14]:
# Make the plot(s)

%matplotlib inline

tagDict = {
    "0" : "Semi-Leptonic",
    "1" : "Fully-Hadronic",
    "2" : "Fully-Leptonic",
    "3" : "Untagged"
}

lumi = 41.5 
isWide = 1
normalize = 0

tags = [str(i) for i in range(0, 1)]
logs = [1]

procs = ["Data", "DYJets", "WWZ"]

for b in backgrounds.keys():
    bName = backgrounds[b]
    procs.append(bName)

for tag in tags:
    print("On tag:",tag)
    tagLabel = tagDict[tag]
    
    for iv, v in enumerate(variables):
        if(v == "weight"): continue ##-- don't plot weight 
        print("On variable:",v)
        
        for log in logs:
            varLabel = copy.copy(v)

            ##-- Define binning 
            if(v in binDict.keys()):
                binInfo = binDict[v]
    
            # If variable is a number of objects
            elif "N_" in v:
                binInfo = [10,0,10]

            # Specified binning if variable has phi, eta or pt in name 
            else:
                # if("phi" in variable_): return [20,-3.14,3.14]
                if("phi" in v): binInfo = [nbins_glob,-3.14,3.15]
                # elif("eta" in variable_): return [16,-4,4]
                elif("eta" in v): binInfo = [nbins_glob,-2.5,2.5]
                elif ("pt" in v): binInfo = [nbins_glob,0,200]   
                elif("bDiscriminator" in v): binInfo = [nbins_glob,0,1]
                else: binInfo = [nbins_glob,0,300] # if variable name meets none of the above conditions, default to this binning     
    
            xbins, xmin, xmax = binInfo
            bins = np.linspace(xmin, xmax, xbins + 1)

            MET_CUT = 0
            print("APPLY MET SELECTION:",MET_CUT)
            ##-- apply semileptonic DNN cut only for SL tag 
            if(tag == "0"):
                for proc in procs:
                    
                    ##-- di-Electron mass 80-100 GeV 
                    if(MET_CUT):
                        ##-- with MET > 40 
                        exec("%s_MASK__ = np.logical_and(%s_CMS_hgg_mass_tag_%s > 80., %s_CMS_hgg_mass_tag_%s < 100.)"%(proc, proc, tag, proc, tag))
                        exec("%s_MASK_ = np.logical_and(%s_MASK__, %s_evalDNN_HH_tag_%s > 0.1)"%(proc, proc, proc, tag))   
                        exec("%s_MASK = np.logical_and(%s_MASK_, %s_METCor_pt_tag_%s > 40.)"%(proc, proc, proc, tag))   
                        
                    else:
                        exec("%s_MASK_ = np.logical_and(%s_CMS_hgg_mass_tag_%s > 80., %s_CMS_hgg_mass_tag_%s < 100.)"%(proc, proc, tag, proc, tag))
                        exec("%s_MASK = np.logical_and(%s_MASK_, %s_evalDNN_HH_tag_%s > 0.1)"%(proc, proc, proc, tag))                           
        
#                     else:
#                         exec("%s_MASK = np.logical_and(%s_MASK_, %s_evalDNN_HH_tag_%s > 0.94)"%(proc, proc, proc, tag))            
#                         exec("%s_MASK = np.logical_and(True, %s_evalDNN_HH_tag_%s > 0.1)"%(proc, proc, tag))            
            else: 
                for proc in procs:
    #                 exec("%s_MASK = np.logical_and(%s_CMS_hgg_mass_tag_%s > 80., %s_CMS_hgg_mass_tag_%s < 100.)"%(proc, proc, tag, proc, tag))
                    exec("%s_MASK = np.logical_and(%s_CMS_hgg_mass_tag_%s > 80, %s_CMS_hgg_mass_tag_%s < 100.)"%(proc, proc, tag, proc, tag))


            ##-- Assuming CMS_hgg_mass and evalDNN_HH already defined:
#             for proc in procs:
#                 exec("%s_MASK_ = np.logical_and(%s_CMS_hgg_mass_tag_%s > 70., %s_CMS_hgg_mass_tag_%s < 100.)"%(proc, proc, tag, proc, tag))
#                 exec("%s_MASK = np.logical_and(%s_MASK_, %s_evalDNN_HH_tag_%s > 0.1)"%(proc, proc, proc, tag))

            ##-- MC weights 
            for proc in procs:
            
                if(v in specialVars.keys()):
                    operation, v1_name, v2_name = specialVars[v]
                    exec("v1 = np.copy(%s_%s_tag_%s)"%(proc, v1_name, tag))
                    exec("v2 = np.copy(%s_%s_tag_%s)"%(proc, v2_name, tag))
                    
                    exec("%s_vals = np.%s(v1, v2)"%(proc, operation))
                    
                else:
                    exec("%s_vals = np.copy(%s_%s_tag_%s)"%(proc, proc, v, tag)) ##-- variable values 
                
                exec("%s_MC_Weights = np.copy(%s_weight_tag_%s)"%(proc, proc, tag)) ##-- MC Weights

                ##-- mask to region 70, 100 of CMS_hgg_mass
                exec("%s_vals = %s_vals[%s_MASK]"%(proc, proc, proc)) ##-- variable values 
                exec("%s_MC_Weights = %s_MC_Weights[%s_MASK]"%(proc, proc, proc)) ##-- MC Weights            

    #         exec("DYJets_MC_Weights = np.copy(DYJets_weight_tag_%s)"%(tag))
    #         exec("WWZ_MC_Weights = np.copy(WWZ_weight_tag_%s)"%(tag))  
    #         exec("Data_MC_Weights = np.copy(Data_weight_tag_%s)"%(tag)) ##-- all 1's for data

            ##-- Scale MC weights 
    #         DYJets_MC_Weights = [float(lumi) * float(w) for w in DYJets_MC_Weights]
    #         WWZ_MC_Weights = [float(lumi) * float(w) for w in WWZ_MC_Weights]        

            all_vals = np.array([])
            all_weights = np.array([]) 
    
            ##-- Get bin heights from numpy histograms 
            for proc in procs:
                exec("%s_binVals, %s_edges, %s_ = plt.hist(%s_vals, bins = bins, weights = %s_MC_Weights)"%(proc, proc, proc, proc, proc))
                plt.close()
                
#                 ##-- Add sum of weights squared from each proc 
                if(proc == "Data"): continue 
                exec("weights_copy = np.copy(%s_MC_Weights)"%(proc))
                weights_copy = [i * 41.5 for i in weights_copy]
                if(proc == "DYJets"):
                    weights_copy = [i * ((6077. / 5765.4)) for i in weights_copy]
                exec("all_vals = np.append(all_vals, ak.to_numpy(%s_vals))"%(proc))
                all_weights = np.append(all_weights, ak.to_numpy(weights_copy))
                    
            
            binned_MC_stat_uncertainties = hist_bin_uncertainty(all_vals, all_weights, bins)
        
            ##-- Scale by lumi 
            DYJets_binVals = [float(value) * float(lumi) * (6077. / 5765.4) for value in DYJets_binVals] ##-- Scale DY by 6077 / 5765.4
            WWZ_binVals = [float(value) * float(lumi) for value in WWZ_binVals]

            for b in backgrounds.keys():
                bName = backgrounds[b]
                exec("%s_binVals = [float(value) * float(lumi) for value in %s_binVals]"%(bName, bName))               
            
            ##-- bins 
            binWidth = (xmax - xmin) / xbins
            binCenters = [float(a) + (float(binWidth)/2.) for a in Data_edges[:-1]] ##-- use data edges since they should all be the same anyway 

            plt.close('all')
            
            ##-- Create plot 
            fig, axarr = plt.subplots(2, 
                                        sharex=True, 
                                        gridspec_kw={
                                            'hspace': 0.15,
                                            'height_ratios': (0.75,0.25)
                                            }
                                        )    
            fig.set_size_inches(7.5, 12)
            upper = axarr[0]
            lower = axarr[1]        

            Data_errors = [np.sqrt(N) for N in Data_binVals]

            ##-- If normalizing MC to yield of data 
            if(normalize):
                Data_Sum = np.sum(Data_binVals)
                DYJets_Sum = np.sum(DYJets_binVals)
                WWZ_Sum = np.sum(WWZ_binVals)
                MC_Sum = DYJets_Sum + WWZ_Sum
                
                DYJets_MC_factor = float(DYJets_Sum) / float(MC_Sum)
                WWZ_MC_factor = float(WWZ_Sum) / float(MC_Sum)
                
                DYJets_factor = ( float(Data_Sum) * float(DYJets_MC_factor) ) / float(DYJets_Sum)
                WWZ_factor = ( float(Data_Sum) * float(WWZ_MC_factor) ) / float(WWZ_Sum)
                
                WWZ_binVals = [WWZ_factor * nonScaledVal for nonScaledVal in WWZ_binVals] 
                DYJets_binVals = [DYJets_factor * nonScaledVal for nonScaledVal in DYJets_binVals] 
            
            
            ##-- Compute the yields once per tag  
            if(iv == 0 and log == 0):      
                Yields = {}
                DYJets_yield = np.sum(DYJets_binVals)
                WWZ_yield = np.sum(WWZ_binVals)
                Yields["DYJets"] = DYJets_yield
                Yields["WWZ"] = WWZ_yield
                for b in backgrounds.keys():
                    bName = backgrounds[b]
                    exec("b_yield = np.sum(%s_binVals)"%(bName))
                    Yields[bName] = b_yield

                exec("sorted_Yields_Tag_%s = sorted(Yields.items(), key=operator.itemgetter(1))"%(tag))

            all_Bkg_weights, all_Bkg_labels = [], [], 
            all_Bkg_weights = [WWZ_binVals]
            all_Bkg_labels = ["WWZ"]
            all_Bkg_colors = [(231/255.,  76/255.,  60/255.)]
            all_edge_colors = [(231/255.,  76/255.,  60/255.)]
            
            for b in backgrounds.keys():
                bName = backgrounds[b]
#                 all_Bkg_bins.append([bins[:-1]])
                exec("all_Bkg_weights.append(%s_binVals)"%(bName))
                b_Cat = background_cats[b]
#                 print("on background category:",b_Cat)
                b_Color = cat_colors[b_Cat]
                all_Bkg_colors.append(b_Color)
                all_edge_colors.append(b_Color)
#                 if(b_Cat not in all_Bkg_labels):
                all_Bkg_labels.append(b_Cat)
                    
            all_Bkg_weights.append(DYJets_binVals)
            all_Bkg_labels.append(r"Z$\rightarrow\ell\ell + $jets")
            all_Bkg_colors.append((149/255., 165/255., 166/255.))
            all_edge_colors.append((149/255., 165/255., 166/255.))
            
            upper.hist((bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1], bins[:-1]), weights = all_Bkg_weights, color = all_Bkg_colors, linewidth=0.0001, bins = bins, histtype = 'stepfilled', label = all_Bkg_labels, zorder = 0, stacked = True)

            MC_Stack_binVals = np.add(DYJets_binVals, WWZ_binVals)
        
#             MC_stack_w2_vals = [] ##-- Fill by bin. 
            for b in backgrounds.keys():
                bName = backgrounds[b]
                exec("MC_Stack_binVals = np.add(MC_Stack_binVals, %s_binVals)"%(bName))

            labelSize = 22.5

            lower.minorticks_on()
            upper.minorticks_on()
    
            lower.tick_params(axis = 'x', labelsize = labelSize, which='both', direction='in', top = True, length=6)
            lower.tick_params(axis = 'y', labelsize = labelSize, which='both', direction='in', right = True, length=6)

            upper.tick_params(axis = 'x', labelsize = labelSize, which='both', direction='in', top = True, length=6)
            upper.tick_params(axis = 'y', labelsize = labelSize, which='both', direction='in', right = True, length=6)
            
            upper.set_ylabel("Entries", fontsize = labelSize)
#             upper.ticklabel_format(style='plain') ##-- Remove scientific notation
            
            xlabelDict = {
                "evalDNN_HH" : "DNN score"
            }
            xlabel = xlabelDict[varLabel]
            
            lower.set_xlabel(xlabel, fontsize = 20)
            lower.set_ylabel("Data / MC", fontsize = 20)
            lower.set_ylim(0.1, 3)
            lower.plot([xmin, xmax],[1,1],linestyle=':', color = 'black')

            zero_errors = [0 for entry in binCenters]

            ratio = np.true_divide(Data_binVals, MC_Stack_binVals, out = np.zeros_like(MC_Stack_binVals), where = MC_Stack_binVals != 0)   

            errors = []
            MC_stack_w2_all = []
            for val_i, d_val in enumerate(Data_binVals):
                MC_Stack_val = MC_Stack_binVals[val_i]
                r_val = ratio[val_i]
                if(MC_Stack_val <= 0): 
                    errors.append(0.)
                    MC_stack_w2_all.append(0.)
                else:
#                     rel_err = np.sqrt( (1 / d_val) + (1 / MC_Stack_val) ) ##-- This is incorrect - need to use sqrt(sum(w^2)) per bin for MC stack uncertainty 
                    MC_stack_w2 = binned_MC_stat_uncertainties[val_i]
                    MC_stack_w2_all.append(MC_stack_w2)
                    rel_err = np.sqrt( (1 / d_val) + (MC_stack_w2 / MC_Stack_val)**2 ) 
                    err = float(rel_err) * r_val
                    errors.append(err)

            lower.errorbar(binCenters, ratio, xerr = zero_errors , yerr = zero_errors, marker = '.', ms = 15, color = 'black', ls = '')  
            
            ymin, ymax = upper.get_ylim()
            if(log):
                upper.set_ylim(0.001, ymax * 100000.)
                upper.set_yscale('log')
                upper.minorticks_on()  

            # add shaded error bars 
            x = np.array(binCenters)
            y = np.array(MC_Stack_binVals)
            y_lowerPlot = np.array([float(1.) for entry in ratio])
            nVals = len(x)
            
            xerr_uppers = [binWidth/2. for i in range(0,nVals)]
            xerr = np.array([xerr_uppers, xerr_uppers])
            
            yerr = np.array([MC_stack_w2_all, MC_stack_w2_all]) #ratio
            yerr_lowerPlot = np.array([errors,errors])
            
#             yerr_lowerPlot = []
            
            verbose_ = 0
            _ = make_error_boxes(verbose_, upper, x, y, xerr, yerr)
            
            # for lower errors, should be ratio errors.
            _ = make_error_boxes(verbose_, lower, x, y_lowerPlot, xerr, yerr_lowerPlot)
            
            # plot data on top of MC error boxes:
            upper.scatter(x = binCenters, y = Data_binVals, color = 'black', s = 60, label = "Data", zorder = 1)
            upper.errorbar(x = binCenters, y = Data_binVals, yerr = Data_errors, color = 'black', fmt = " ", zorder = 2) ##-- fmt = " " somehow removes the line               

            handles, labels = upper.get_legend_handles_labels() ##-- remove legend duplicates 
            by_label = OrderedDict(zip(labels, handles))
            upper.legend(by_label.values(), by_label.keys(), title = "%s"%(tagLabel), fontsize = 15, title_fontsize = 18, ncol = 2, prop={'size': 10}, bbox_to_anchor=(0.95, 0.95)) # x, y, width, height   
            
            if(log):
                logLabel = "log"
            else:
                logLabel = "nonLog"
                
            Add_CMS_Header(plt, lumi, isWide, upper, r"Preliminary", 1)
            
            lower.minorticks_on()
            upper.minorticks_on()    
            
            locmin = matplotlib.ticker.LogLocator(base=10.0, subs=np.arange(2, 10) * .1,
                                                  numticks=100)
            upper.yaxis.set_minor_locator(locmin)
            upper.yaxis.set_minor_formatter(matplotlib.ticker.NullFormatter())            
    
            ol = "/eos/user/a/atishelm/www/HIG-21-014/PAS_HIG_21_014_v9/WWZ_CR_Checks/"
            plt.savefig("%s/%s/%s_HHWWggTag_%s.png"%(ol, logLabel, v, tag), bbox_inches='tight')
            plt.savefig("%s/%s/%s_HHWWggTag_%s.pdf"%(ol, logLabel, v, tag), bbox_inches='tight')
            plt.close('all')
    
print("DONE")    

On tag: 0
On variable: evalDNN_HH
APPLY MET SELECTION: 0


  binned_weights = np.asarray(


DONE


In [56]:
# Fully Hadronic 

In [15]:
nbins_glob = 20

binDict = {

#     "evalDNN_HH" : [nbins_glob, 0, 1],
    "evalDNN_HH" : [30, 0.1, 1],
    "evalDNN_WWvsAll" : [30, 0.1, 1],
    "evalDNN_BBvsAll" : [30, 0, 1],
#     "evalDNN_HH" : [3, 0.93571, 1],
#     "evalDNN_HH" : [8, 0.82, 0.93571],
#     "evalDNN_HH" : [10, 0.64, 0.82],
#     "evalDNN_HH" : [70, 0, 1],
    
    "Leading_Photon_MVA": [nbins_glob,-1,1],
    "Subleading_Photon_MVA": [nbins_glob,-1,1],
#     "CMS_hgg_mass": [nbins_glob,100,180],
#     "CMS_hgg_mass": [nbins_glob,80,100],
    "CMS_hgg_mass": [10,85,95],
#     "CMS_hgg_mass": [nbins_glob,0,100],
    "weight":[nbins_glob,-10,10],
    "puweight":[nbins_glob,-2,2],
    "mjj" : [nbins_glob,0,300],
    "e_mT" : [nbins_glob,0,300],
    "mu_mT" : [nbins_glob,0,300],
    "dr_gg" : [nbins_glob,0,3],
    "dr_jj" : [nbins_glob,0,3],
    "pT_gg" : [nbins_glob,0,400],

    "goodJets_0_pt" : [nbins_glob,0,500],
    "goodLepton_pt" : [nbins_glob,0,360],
    "Wmass_goodJets12": [nbins_glob,0,750],
    # "Subleading_Photon_pt/CMS_hgg_mass",
    "goodJets_1_E": [nbins_glob,0,360],
    "goodJets_1_pt": [nbins_glob,0,360],
    "goodLepton_E": [nbins_glob,0,360],
    "MET_pt": [nbins_glob,0,400],
    "goodJets_0_E": [nbins_glob,0,360],
    "Scaled_Leading_Photon_pt" : [nbins_glob,0,3],
    "Scaled_Subleading_Photon_pt" : [nbins_glob,0,1.5],
    "Scaled_Leading_Photon_E" : [nbins_glob,0,3],
    "Scaled_Subleading_Photon_E" : [nbins_glob,0,1.5],
    "Leading_Jet_bscore" : [nbins_glob,0,1],
    "Subleading_Jet_bscore" : [nbins_glob,0,1],
    "Wmt_L" : [nbins_glob,0,300],

#         "evalDNN" : [nDNNbins,evalDNNmin,evalDNNmax], # To include value == 1 
#         "evalDNN_HH" : [nDNNbins,evalDNNmin,evalDNNmax], # To include value == 1 
    "Subleading_Photon_pt" : [24,0,120]
    # "evalDNN" : [10,0,1] # To include value == 1 
}   



# def Add_CMS_Header(plt, lumi, isWide, ax, text, addLumi):
#     print("addLumi",addLumi)
#     ##-- Upper left plot text
#     ##-- CMS 
#     plt.text(
#         # 0.05, 0.9, u"CMS $\it{Preliminary}$",
#         0., 1., u"CMS ",
#         fontsize=20, fontweight='bold',
#         horizontalalignment='left',
#         verticalalignment='bottom',
#         transform=ax.transAxes
#     )

    
#     if(isWide):
#         prelim_x = 0.095
#     else:
#         prelim_x = 0.12
        
    
#     ##-- Preliminary 
#     plt.text(
# #         prelim_x, 0.996, u"$\it{Preliminary}$",
#         prelim_x, 0.996, u"$\it{%s}$"%(text),
#         fontsize=18,
#         horizontalalignment='left',
#         verticalalignment='bottom',
#         transform=ax.transAxes
#     )      

#     if(addLumi):
#         upperRightText = r"%s fb$^{-1}$ (13 TeV)"%(lumi)
#     else:
#         upperRightText = r"(13 TeV)"

#     ##-- Lumi 
#     plt.text(
#         1., 1., upperRightText,
#         fontsize=16, horizontalalignment='right', 
#         verticalalignment='bottom', 
#         transform=ax.transAxes
#     )      



# ['WWZ', 'DiPho_40to80', 'DiPho_80toInf', 'GJet', 'TTGG', 'TTJets', 'TTWJets', 'TTZ', 'WGGJets', 'WGJJ', 'WW', 'ZZ', 'ttWJets', 'Zee', 'Data']

# background_cats = {
    
#     # gamma jet 
#     "GJet_Pt-20to40" : r"$\gamma$Jet",
#     "GJet_Pt-20toInf" : r"$\gamma$Jet",
#     "GJet_Pt-40toInf": r"$\gamma$Jet",
    
#     # gamma gamma jet 
#     "DiPhotonJetsBox_M40_80" : r"$\gamma\gamma+$jets",
#     "DiPhotonJetsBox_MGG-80toInf" : r"$\gamma\gamma+$jets",
# #     "WW_TuneCP5" : "WW",
# #     "TTGJets" : "TTGJets",
    
#     # tt(V) gammas
#     "TTJets" : r"tt(V)$\gamma$(s)",
#     "TTGG_0Jets" : r"tt(V)$\gamma$(s)",
#     "ttWJets" : r"tt(V)$\gamma$(s)",
#     "TTWJetsToLNu" : r"tt(V)$\gamma$(s)",
#     "TTZToLLNuNu" : r"tt(V)$\gamma$(s)",
    
#     # V(s) + gamma(s) + jets 
#     "WGGJets" : r"V(s)$+\gamma$(s)$+$jets",
#     "WGJJToLNu" : r"V(s)$+\gamma$(s)$+$jets",
#     "WWTo1L1Nu2Q" : r"V(s)$+\gamma$(s)$+$jets",
#     "ZZ" : r"V(s)$+\gamma$(s)$+$jets",
#     "WGToLNuG_01J" : r"V(s)$+\gamma$(s)$+$jets",
#     "ZGToLLG_01J" : r"V(s)$+\gamma$(s)$+$jets",
    
#     "W1JetsToLNu_LHEWpT_0-50" : r"V(s)$+\gamma$(s)$+$jets",
#     "W1JetsToLNu_LHEWpT_50-150" : r"V(s)$+\gamma$(s)$+$jets",
#     "W1JetsToLNu_LHEWpT_150-250" : r"V(s)$+\gamma$(s)$+$jets",
#     "W1JetsToLNu_LHEWpT_250-400" : r"V(s)$+\gamma$(s)$+$jets",
#     "W1JetsToLNu_LHEWpT_400-inf" : r"V(s)$+\gamma$(s)$+$jets",
    
#     "W2JetsToLNu_LHEWpT_0-50" : r"V(s)$+\gamma$(s)$+$jets",
#     "W2JetsToLNu_LHEWpT_50-150" : r"V(s)$+\gamma$(s)$+$jets",
#     "W2JetsToLNu_LHEWpT_150-250" : r"V(s)$+\gamma$(s)$+$jets",
#     "W2JetsToLNu_LHEWpT_250-400" : r"V(s)$+\gamma$(s)$+$jets",
#     "W2JetsToLNu_LHEWpT_400-inf" : r"V(s)$+\gamma$(s)$+$jets",
    
#     "W3JetsToLNu" : r"V(s)$+\gamma$(s)$+$jets",
#     "W4JetsToLNu" : r"V(s)$+\gamma$(s)$+$jets",    
 
#     # H to gg 
#     "GluGluHToGG" : r"H$\rightarrow\gamma\gamma$",
#     "VBFHToGG" : r"H$\rightarrow\gamma\gamma$",
#     "VHToGG" : r"H$\rightarrow\gamma\gamma$",    
    
# }

background_cats = {
    
    "WWZ" : "WWZ",
    "Zee" : r"Z$\rightarrow\ell\ell + $jets",
    
    "GJet_Pt-20to40" : "$\gamma$Jet",
    "GJet_Pt-20toInf" : "$\gamma$Jet",
    "GJet_Pt-40toInf": r"$\gamma$Jet",
    "DiPho_40to80" : "$\gamma\gamma+$jets",
    "DiPho_80toInf" : "$\gamma\gamma+$jets",
    "GJet" : r"\gammaJet",
    
#     "WW_TuneCP5" : "WW",
#     "TTGJets" : "TTGJets",
    
    "ttWJets" : r"tt(V)$\gamma$(s)",
    "TTZ" : r"tt(V)$\gamma$(s)",
    "ttWJets" : r"tt(V)$\gamma$(s)",
    "TTWJets" : r"tt(V)$\gamma$(s)",
    "TTGG" : r"tt(V)$\gamma$(s)",
    "TTJets" : r"tt(V)$\gamma$(s)",
    "TTGG_0Jets" : r"tt(V)$\gamma$(s)",
    
    "WGGJets" : r"V(s)$+\gamma$(s)$+$jets",
    "WGJJ" : r"V(s)$+\gamma$(s)$+$jets",
    "WW" : r"V(s)$+\gamma$(s)$+$jets",
    "ZZ" : r"V(s)$+\gamma$(s)$+$jets",
    "WGToLNuG_01J" : r"V(s)$+\gamma$(s)$+$jets",
    "ZGToLLG_01J" : r"V(s)$+\gamma$(s)$+$jets",    
    "W1JetsToLNu_LHEWpT_0-50" : r"V(s)$+\gamma$(s)$+$jets",
    "W1JetsToLNu_LHEWpT_50-150" : r"V(s)$+\gamma$(s)$+$jets",
    "W1JetsToLNu_LHEWpT_150-250" : r"V(s)$+\gamma$(s)$+$jets",
    "W1JetsToLNu_LHEWpT_250-400" : r"V(s)$+\gamma$(s)$+$jets",
    "W1JetsToLNu_LHEWpT_400-inf" : r"V(s)$+\gamma$(s)$+$jets",
    
    "W2JetsToLNu_LHEWpT_0-50" : r"V(s)$+\gamma$(s)$+$jets",
    "W2JetsToLNu_LHEWpT_50-150" : r"V(s)$+\gamma$(s)$+$jets",
    "W2JetsToLNu_LHEWpT_150-250" : r"V(s)$+\gamma$(s)$+$jets",
    "W2JetsToLNu_LHEWpT_250-400" : r"V(s)$+\gamma$(s)$+$jets",
    "W2JetsToLNu_LHEWpT_400-inf" : r"V(s)$+\gamma$(s)$+$jets",
    
    "W3JetsToLNu" : r"V(s)$+\gamma$(s)$+$jets",
    "W4JetsToLNu" : r"V(s)$+\gamma$(s)$+$jets",    

    
    "GluGluHToGG" : r"H$\rightarrow\gamma\gamma$",
    "VBFHToGG" : r"H$\rightarrow\gamma\gamma$",
    "VHToGG" : r"H$\rightarrow\gamma\gamma$",

    
}


cat_colors = {
    "WWZ" : (231/255.,  76/255.,  60/255.),
    "$\gamma$Jet" : (52/255., 152/255., 219/255),
    "$\gamma\gamma+$jets" : (241/255., 196/255.,  15/255.),
    "tt" : "C4",
    "H$\rightarrow\gamma\gamma$" : (26/255., 188/255., 156/255.),
    r"tt(V)$\gamma$(s)" : (230/255., 126/255.,  34/255.),
    r"Z$\rightarrow\ell\ell + $jets" : (149/255., 165/255., 166/255.),
    r"V(s)$+\gamma$(s)$+$jets" : ((142/255.,  68/255., 173/255)),
    r"tt(V)$\gamma$(s)" : (230/255., 126/255.,  34/255.)
}

#     "$\gamma$Jet" : (52/255., 152/255., 219/255),
#     "$\gamma\gamma+$jets" : (241/255., 196/255.,  15/255.),
#     r"tt(V)$\gamma$(s)" : (230/255., 126/255.,  34/255.),
#     "other" : "C6",
#     r"H$\rightarrow\gamma\gamma$" : (26/255., 188/255., 156/255.),
#     r"V(s)$+\gamma$(s)$+$jets" : ((142/255.,  68/255., 173/255))

print("DONE")

DONE


In [16]:
import uproot
from matplotlib import pyplot as plt 
import numpy as np
import copy 
from collections import OrderedDict
import operator 
import collections 
import pandas as pd 
import os 
import awkward as ak
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection

In [17]:
##-- Define uproot files 

labelDict = {
    "OtherBkg_WWZ_2017_1_CategorizedTrees.root" : "WWZ",
    "OtherBkg_DiPhotonJetsBox_M40_80_2017_1_CategorizedTrees.root" : "DiPho_40to80",
    "OtherBkg_DiPhotonJetsBox_MGG_2017_1_CategorizedTrees.root" : "DiPho_80toInf",
    "OtherBkg_GJet_Pt_2017_1_CategorizedTrees.root" : "GJet",
    "OtherBkg_TTGG_0Jets_2017_1_CategorizedTrees.root" : "TTGG",
    #"OtherBkg_TTGJets_2017_1_CategorizedTrees.root" : "TTGJets",
    "OtherBkg_TTJets_2017_1_CategorizedTrees.root" : "TTJets",
    "OtherBkg_TTWJetsToLNu_2017_1_CategorizedTrees.root" : "TTWJets",
    "OtherBkg_TTZToLLNuNu_2017_1_CategorizedTrees.root" : "TTZ",
    "OtherBkg_WGGJets_2017_1_CategorizedTrees.root" : "WGGJets",
    "OtherBkg_WGJJToLNu_2017_1_CategorizedTrees.root" : "WGJJ",
    #"OtherBkg_WWTo1L1Nu2Q_2017_1_CategorizedTrees.root" : "WW_SL",
    "OtherBkg_WW_TuneCP5_2017_1_CategorizedTrees.root" : "WW",
    "OtherBkg_ZZ_2017_1_CategorizedTrees.root" : "ZZ",
    "OtherBkg_ttWJets_2017_1_CategorizedTrees.root" : "ttWJets",    
    "OtherBkg_Zee_v14_2017_1_CategorizedTrees.root" : "Zee",
    "Data_2017_CategorizedTrees.root" : "Data",
}

procs = [val for val in labelDict.values()]
fNamesInDict = [val for val in labelDict.keys()]
print(procs)
# direc = "/eos/user/r/rasharma/post_doc_ihep/double-higgs/ntuples/January_2021_Production/SignalValidation/CategorizeRootFile_evalDNN_BBvsAll0p60/"
direc = "/eos/user/r/rasharma/post_doc_ihep/double-higgs/ntuples/January_2021_Production/SignalValidation_v2/"
files = [f for f in os.listdir(direc)]
for f in files:
    print(f)
    if(f not in fNamesInDict): continue 
    fullPath = "%s/%s"%(direc, f)
    fileLabel = labelDict[f]
    
    exec("ur_%s = uproot.open('%s')"%(fileLabel, str(fullPath)))


##-- Define variable to define arrays for 
print(fNamesInDict)
print(len(fNamesInDict))
variables_to_save = [
    "CMS_hgg_mass",
    "evalDNN_WWvsAll", 
    "evalDNN_BBvsAll",
    "weight",
    "MET_pt"
]

##-- Define variable arrays 
FH_cats = [i for i in range(0,4)]
# FH_cats = [i for i in range(0,1)]

for f_i, f in enumerate(files):
    fullPath = "%s/%s"%(direc, f)
    if(f not in fNamesInDict): continue 
    fileLabel = labelDict[f]
    
    exec("ur_file = ur_%s"%(fileLabel)) ##-- define temporary uproot file as previous defined one 
    keys = ur_file.keys()
    treeName = keys[0] ##-- Take zeroeth tree name to extract labeled tree name 
    treeName = treeName.replace("_HHWWggTag_FHDNN_0;1", "")
    treeName = treeName.replace("_HHWWggTag_FHDNN_1;1", "")
    treeName = treeName.replace("_HHWWggTag_FHDNN_2;1", "")
    treeName = treeName.replace("_HHWWggTag_FHDNN_3;1", "")
    
    for v in variables_to_save:
        exec("%s_%s_combined = np.array([], dtype = float) "%(fileLabel, v))


        for FH_cat in FH_cats:
            treeNameFull = "%s_HHWWggTag_FHDNN_%s"%(treeName, FH_cat)
            
            """
            fileLabel_variable_FH_cat ---> Array name convention 
            """
            exec("%s_%s_%s = ur_file[treeNameFull]['%s'].array() "%(fileLabel, v, FH_cat, v))
            exec("%s_%s_%s_np = ak.to_numpy(%s_%s_%s)"%(fileLabel, v, FH_cat, fileLabel, v, FH_cat))
            exec("%s_%s_combined = np.append(%s_%s_combined, %s_%s_%s_np)"%(fileLabel, v, fileLabel, v, fileLabel, v, FH_cat))
        
print("DONE")


['WWZ', 'DiPho_40to80', 'DiPho_80toInf', 'GJet', 'TTGG', 'TTJets', 'TTWJets', 'TTZ', 'WGGJets', 'WGJJ', 'WW', 'ZZ', 'ttWJets', 'Zee', 'Data']
Data_2017_CategorizedTrees.root
OtherBkg_DiPhotonJetsBox_M40_80_2017_1_CategorizedTrees.root
OtherBkg_DiPhotonJetsBox_MGG_2017_1_CategorizedTrees.root
OtherBkg_GJet_Pt_2017_1_CategorizedTrees.root
OtherBkg_TTGG_0Jets_2017_1_CategorizedTrees.root
OtherBkg_TTGJets_2017_1_CategorizedTrees.root
OtherBkg_TTJets_2017_1_CategorizedTrees.root
OtherBkg_TTWJetsToLNu_2017_1_CategorizedTrees.root
OtherBkg_TTZToLLNuNu_2017_1_CategorizedTrees.root
OtherBkg_WGGJets_2017_1_CategorizedTrees.root
OtherBkg_WGJJToLNu_2017_1_CategorizedTrees.root
OtherBkg_WWTo1L1Nu2Q_2017_1_CategorizedTrees.root
OtherBkg_WWZ_2017_1_CategorizedTrees.root
OtherBkg_WW_TuneCP5_2017_1_CategorizedTrees.root
OtherBkg_ZZ_2017_1_CategorizedTrees.root
OtherBkg_Zee_v14_2017_1_CategorizedTrees.root
OtherBkg_ttWJets_2017_1_CategorizedTrees.root
['OtherBkg_WWZ_2017_1_CategorizedTrees.root', 'Other

In [18]:
# Make the fully hadronic plot(s)

##-- Plot variables 
variables_to_plot = [
#     "CMS_hgg_mass",
    "evalDNN_WWvsAll",
#     "evalDNN_BBvsAll",
#     "weight"
]
%matplotlib inline

lumi = 41.5 
isWide = 1
normalize = 0
show = 0
FH_cats = ["combined"]
for log in [1]:
    for FH_cat in FH_cats:    
        print("FH_cat:",FH_cat)
        for iv, v in enumerate(variables_to_plot):
            if(v=="weight"): continue ##-- don't plot weight 
            print("Variable:",v)

            varLabel = copy.copy(v)

            ##-- Define binning 
            if(v in binDict.keys()):
                binInfo = binDict[v]

            # If variable is a number of objects
            elif "N_" in v:
                binInfo = [10,0,10]

            # Specified binning if variable has phi, eta or pt in name 
            else:
                # if("phi" in variable_): return [20,-3.14,3.14]
                if("phi" in v): binInfo = [nbins_glob,-3.14,3.15]
                # elif("eta" in variable_): return [16,-4,4]
                elif("eta" in v): binInfo = [nbins_glob,-2.5,2.5]
                elif ("pt" in v): binInfo = [nbins_glob,0,200]   
                elif("bDiscriminator" in v): binInfo = [nbins_glob,0,1]
                else: binInfo = [nbins_glob,0,300] # if variable name meets none of the above conditions, default to this binning     
            if(v == "evalDNN_WWvsAll"):
                perCatBins = {
                    0 : [7, 0.988, 1],
                    1 : [7, 0.97, 0.99],
                    2 : [7, 0.93, 0.98],
                    3 : [7, 0, 0.93],
                    "combined" : [20, 0.1, 1]
                }

                binInfo = perCatBins[FH_cat]

            xbins, xmin, xmax = binInfo
            bins = np.linspace(xmin, xmax, xbins + 1)

            MET_CUT = 0
            
            for proc in procs:
                exec("%s_vals = np.copy(%s_%s_%s)"%(proc, proc, v, FH_cat)) ##-- Variable to plot 
                exec("proc_hgg_mass = %s_CMS_hgg_mass_%s"%(proc, FH_cat)) ##-- CMS_hgg_mass to use for mask 
                exec("proc_evalDNN_WWvsAll = %s_evalDNN_WWvsAll_%s"%(proc, FH_cat))
                exec("%s_MC_Weights = %s_weight_%s"%(proc, proc, FH_cat)) ##-- MC weights for shape 
  
                ##-- Only add selection of mass 
    
                if(MET_CUT):
                    ##-- With MET selection 
                    exec("%s_%s_%s_MASK_ = np.logical_and(proc_hgg_mass > 80, proc_hgg_mass < 100.)"%(proc, v, FH_cat)) ##-- MASK 
                    exec("%s_%s_%s_MASK = np.logical_and(%s_%s_%s_MASK_, %s_MET_pt_%s > 40.)"%(proc, v, FH_cat, proc, v, FH_cat, proc, FH_cat)) ##-- MASK 
            
                else:
                    ##-- Without MET Selection 
                    exec("%s_%s_%s_MASK = np.logical_and(proc_hgg_mass > 85, proc_hgg_mass < 95.)"%(proc, v, FH_cat)) ##-- MASK 
        
                exec("%s_vals = %s_vals[%s_%s_%s_MASK]"%(proc, proc, proc, v, FH_cat))
                exec("%s_MC_Weights = %s_MC_Weights[%s_%s_%s_MASK]"%(proc, proc, proc, v, FH_cat))
                

            all_vals = np.array([])
            all_weights = np.array([])                 
                
            ##-- Get bin heights from numpy histograms 
            for proc in procs:
                exec("%s_binVals, %s_edges, %s_ = plt.hist(%s_vals, bins = bins, weights = %s_MC_Weights)"%(proc, proc, proc, proc, proc))
                plt.close()
                
                
#                 ##-- Add sum of weights squared from each proc 
                if(proc == "Data"): continue 
                exec("weights_copy = np.copy(%s_MC_Weights)"%(proc))
                weights_copy = [i * 41.5 for i in weights_copy]
                if(proc == "DYJets"):
                    weights_copy = [i * ((6077. / 5765.4)) for i in weights_copy]
                exec("all_vals = np.append(all_vals, ak.to_numpy(%s_vals))"%(proc))
                all_weights = np.append(all_weights, ak.to_numpy(weights_copy))                
                
            binned_MC_stat_uncertainties = hist_bin_uncertainty(all_vals, all_weights, bins)                

            ##-- Scale 
            for proc in procs:

                ##-- Special scaling for data and DY:

                ##-- Do not scale data 
                if(proc == "Data"): continue 
                elif(proc == "Zee"):
                    exec("%s_binVals = [float(value) * float(lumi) * (6077. / 5765.4) for value in %s_binVals]"%(proc, proc))
                else:
                    exec("%s_binVals = [float(value) * float(lumi) for value in %s_binVals]"%(proc, proc))


    #         ##-- MC weights 
    #         for proc in procs:

    #             if(v in specialVars.keys()):
    #                 operation, v1_name, v2_name = specialVars[v]
    #                 exec("v1 = np.copy(%s_%s_tag_%s)"%(proc, v1_name, tag))
    #                 exec("v2 = np.copy(%s_%s_tag_%s)"%(proc, v2_name, tag))

    #                 exec("%s_vals = np.%s(v1, v2)"%(proc, operation))

    #             else:
    #                 exec("%s_vals = np.copy(%s_%s_tag_%s)"%(proc, proc, v, tag)) ##-- variable values 


            ##-- bins 
            binWidth = (xmax - xmin) / xbins
            binCenters = [float(a) + (float(binWidth)/2.) for a in Data_edges[:-1]] ##-- use data edges since they should all be the same anyway 

            ##-- Create plot 
            fig, axarr = plt.subplots(2, 
                                        sharex=True, 
                                        gridspec_kw={
                                            'hspace': 0.15,
                                            'height_ratios': (0.75,0.25)
                                            }
                                        )    
            fig.set_size_inches(7.5, 12)
            upper = axarr[0]
            lower = axarr[1]      

            Data_errors = [np.sqrt(N) for N in Data_binVals]

            ##-- Compute the yields once per tag  
            if(iv == 0 and log == 0):      
                Yields = {}
                for proc in procs:
                    exec("%s_yield = np.sum(%s_binVals)"%(proc, proc))
                    exec("Yields['%s'] = %s_yield"%(proc, proc))
#                 DYJets_yield = np.sum(DYJets_binVals)
#                 WWZ_yield = np.sum(WWZ_binVals)
#                 Yields["DYJets"] = DYJets_yield
#                 Yields["WWZ"] = WWZ_yield
#                 for b in backgrounds.keys():
#                     bName = backgrounds[b]
#                     exec("b_yield = np.sum(%s_binVals)"%(bName))
#                     Yields[bName] = b_yield
                exec("sorted_Yields = sorted(Yields.items(), key=operator.itemgetter(1))")
    
            upper.scatter(x = binCenters, y = Data_binVals, color = 'black', s = 60, label = "Data", zorder = 1)
            upper.errorbar(x = binCenters, y = Data_binVals, yerr = Data_errors, color = 'black', fmt = " ", zorder = 2) ##-- fmt = " " somehow removes the line   
            all_Bkg_weights, all_Bkg_labels, all_Bkg_colors = [], [], []
            N_nonZeroBackgrounds = 0 
            for proc in procs:
                if(proc == "Data"): continue
                exec("vals = np.copy(%s_vals)"%(proc))
                if(len(vals) == 0): continue 
                N_nonZeroBackgrounds += 1 
                exec("all_Bkg_weights.append(%s_binVals)"%(proc))
                proc_cat = background_cats[proc]
                b_Color = cat_colors[proc_cat]
                all_Bkg_labels.append(proc_cat)
                all_Bkg_colors.append(b_Color)                
            allBins_l = [bins[:-1] for i in range(N_nonZeroBackgrounds)]
            allBins = tuple(allBins_l)
            upper.hist(allBins, 
                       weights = all_Bkg_weights, 
                       bins = bins, 
                       histtype = 'stepfilled', 
                       label = all_Bkg_labels, 
                       zorder = 0, 
                       stacked = True,
                       color = all_Bkg_colors
                      )
            MC_Stack_binVals = np.array([0] * xbins)
            for proc in procs:
                if(proc == "Data"): continue 
                exec("MC_Stack_binVals = np.add(MC_Stack_binVals, %s_binVals)"%(proc))
            lower.tick_params(axis = 'x', labelsize = 13)
            upper.tick_params(axis = 'y', labelsize = 13)
            
            
            
            labelSize = 22.5

            lower.minorticks_on()
            upper.minorticks_on()
    
            lower.tick_params(axis = 'x', labelsize = labelSize, which='both', direction='in', top = True, length=6)
            lower.tick_params(axis = 'y', labelsize = labelSize, which='both', direction='in', right = True, length=6)

            upper.tick_params(axis = 'x', labelsize = labelSize, which='both', direction='in', top = True, length=6)
            upper.tick_params(axis = 'y', labelsize = labelSize, which='both', direction='in', right = True, length=6)            
            
            
            
            upper.set_ylabel("Entries", fontsize = 20)
            upper.ticklabel_format(style='plain') ##-- Remove scientific notation
            
            xlabelDict = {
                "evalDNN_WWvsAll" : "DNN score"
            }
            xlabel = xlabelDict[varLabel]            
            
            lower.set_xlabel(xlabel, fontsize = 20)
            lower.set_ylabel("Data / MC", fontsize = 20)
            lower.set_ylim(0.1, 3)
            lower.plot([xmin, xmax],[1,1],linestyle=':', color = 'black')
            zero_errors = [0 for entry in binCenters]
            ratio = np.true_divide(Data_binVals, MC_Stack_binVals, out = np.zeros_like(MC_Stack_binVals), where = MC_Stack_binVals != 0)   
            errors = []        
            
            
            MC_stack_w2_all = []
            for val_i, d_val in enumerate(Data_binVals):
                MC_Stack_val = MC_Stack_binVals[val_i]
                MC_bin_W2 = binned_MC_stat_uncertainties[val_i]
                r_val = ratio[val_i]
                if(MC_Stack_val <= 0): 
                    errors.append(0.)
                    MC_stack_w2_all.append(0.)
                else:
                    rel_err = np.sqrt( (1 / d_val) + (MC_bin_W2 / MC_Stack_val)**2 )
                    #MC_stack_w2 = binned_MC_stat_uncertainties[val_i]
                    MC_stack_w2_all.append(MC_bin_W2)                    
                    err = float(rel_err) * r_val
                    errors.append(err)
#             lower.errorbar(binCenters, ratio, xerr = zero_errors , yerr = errors, marker = '.', color = 'black', ls = '')  
            lower.errorbar(binCenters, ratio, xerr = zero_errors , yerr = zero_errors, marker = '.',  ms = 15, color = 'black', ls = '')  
            Add_CMS_Header(plt, lumi, isWide, upper, "Preliminary", 1)
            
            handles, labels = upper.get_legend_handles_labels() ##-- remove legend duplicates 
            by_label = OrderedDict(zip(labels, handles))
            upper.legend(by_label.values(), by_label.keys(), title = "Fully-Hadronic", fontsize = 15, title_fontsize = 18, ncol = 2, prop={'size': 10}, bbox_to_anchor=(0.95, 0.95)) # x, y, width, height   
            
            # add shaded error bars 
            x = np.array(binCenters)
            y = np.array(MC_Stack_binVals)
            nVals = len(x)
            
            xerr_uppers = [binWidth/2. for i in range(0,nVals)]
            xerr = np.array([xerr_uppers, xerr_uppers])
            
            yerr_uppers = MC_stack_w2_all
            
            yerr_uppers = [float(100.) for i in range(0,nVals)]
            yerr = np.array([MC_stack_w2_all, MC_stack_w2_all]) #ratio    
            
            y_lowerPlot = np.array([float(1.) for entry in ratio])
            
            yerr_lowerPlot = np.array([errors, errors])
            
            verbose_ = 0
            _ = make_error_boxes(verbose_, upper, x, y, xerr, yerr)            
            
            # for lower errors, should be ratio errors.
            _ = make_error_boxes(verbose_, lower, x, y_lowerPlot, xerr, yerr_lowerPlot)            
            
            ymin, ymax = upper.get_ylim()
            if(log):
                upper.set_ylim(0.01, ymax * 100000.)
                upper.set_yscale('log')
            else:
                ymin, ymax = upper.get_ylim()
                upper.set_ylim(ymin, ymax * 2)
            if(log):
                logLabel = "log"
            else:
                logLabel = "nonLog"
            ol = "/eos/user/a/atishelm/www/HIG-21-014/PAS_HIG_21_014_v9/WWZ_CR_Checks/log/"

            lower.minorticks_on()
            upper.minorticks_on()    
            
            locmin = matplotlib.ticker.LogLocator(base=10.0, subs=np.arange(2, 10) * .1,
                                                  numticks=100)
            upper.yaxis.set_minor_locator(locmin)
            upper.yaxis.set_minor_formatter(matplotlib.ticker.NullFormatter())              
            
            if(show):
                plt.show()
            else:
                plt.savefig("%s/%s_HHWWggTag_1_%s.png"%(ol, v, logLabel), bbox_inches='tight')
                plt.savefig("%s/%s_HHWWggTag_1_%s.pdf"%(ol, v, logLabel), bbox_inches='tight')

            plt.close()    
    
print("DONE")    

FH_cat: combined
Variable: evalDNN_WWvsAll


  binned_weights = np.asarray(


DONE
