In [1]:
import os
import time
import glob
import re
from functools import reduce
import numpy as np
import uproot
import uproot_methods
import awkward
import pandas as pd
from klepto.archives import dir_archive


import coffea.processor as processor
from coffea.processor.accumulator import AccumulatorABC
from coffea import hist
from coffea.analysis_objects import JaggedCandidateArray

%matplotlib inline
import matplotlib.pyplot as plt

In [7]:
#this cell for plotting NN score
import os
import time
import glob
import re
import pandas as pd
from functools import reduce
from klepto.archives import dir_archive

import numpy as np
from tqdm.auto import tqdm
import coffea.processor as processor
from coffea.processor.accumulator import AccumulatorABC
from coffea.analysis_objects import JaggedCandidateArray
from coffea.btag_tools import BTagScaleFactor
from coffea import hist
import pandas as pd
import uproot_methods
import uproot
import awkward
import copy

from memory_profiler import profile

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

from Tools.config_helpers import *
from Tools.helpers import mergeArray, mt

from Tools.objects import Collections
from Tools.cutflow import Cutflow

# This just tells matplotlib not to open any
# interactive windows.
matplotlib.use('Agg')

In [8]:
def pad_and_flatten(val): 
    try:
        return val.pad(1, clip=True).fillna(0.).flatten()#.reshape(-1, 1)
    except AttributeError:
        return val.flatten()

#model = tf.keras.models.load_model('../ML/data/training.h5')#, custom_objects=None, compile=False)

#model._make_predict_function()
#graph = tf.get_default_graph()

#def run_model(inputs):
#    global graph
#    with graph.as_default():
#        outputs = model.predict(inputs)
#    return outputs

os.environ['KERAS_BACKEND'] = 'theano'
from keras.models import load_model

#model = load_model('../ML/data/training.h5')

In [9]:
import sys
sys.setrecursionlimit(10000)
print(sys.getrecursionlimit())

10000


In [38]:
#Let's define our processor first. 

class WHhadProcessor(processor.ProcessorABC):
    def __init__(self):
        
        ## load the NN
        self.model = load_model('../ML/data/lostLep_Z_backgrounds/training.h5')
        self.stds  = pd.read_json('../ML/data/lostLep_Z_backgrounds/stds.json').squeeze()
        self.means = pd.read_json('../ML/data/lostLep_Z_backgrounds/means.json').squeeze()
        
        #Great, now let's define some bins for our histograms.
        
        dataset_axis         = hist.Cat("dataset", "Primary dataset")
        #pt_axis              = hist.Bin("pt", r"$p_{T}$ (GeV)", 500, 0, 2000)
        pt_axis              = hist.Bin("pt", r"$p_{T}$ (GeV)", 15, 0, 300)
        multiplicity_axis    = hist.Bin("multiplicity", r"N", 30, -0.5, 29.5)
        phi_axis             = hist.Bin("phi", r"$\Delta \phi$", 80, 0, 8)
        mass_axis            = hist.Bin("mass", r"mass (GeV)", 500, 0, 2000)
        r_axis               = hist.Bin("r", r"$\Delta R$", 80, 0, 4)
        score_axis           = hist.Bin("score", r"NN Score", 10, 0, 1)

        #In order to create proper histograms, we always need to include a dataset axis!
        #For different types of histograms with different scales, I create axis to fit 
        #those dimensions!
        
        #Now, let's move to actually telling our processor what histograms we want to make.
        #Let's start out simple. 
        self._accumulator = processor.dict_accumulator({
            "h_pt_met200":                          hist.Hist("Counts", dataset_axis, pt_axis),
            "h_pt_met400":                          hist.Hist("Counts", dataset_axis, pt_axis),
            "h_pt_met600":                          hist.Hist("Counts", dataset_axis, pt_axis),
            #"met":                          hist.Hist("Counts", dataset_axis, pt_axis),
            #"ht":                           hist.Hist("Counts", dataset_axis, pt_axis),
            #"jet_pt":                       hist.Hist("Counts", dataset_axis, pt_axis),
            #"njets":                        hist.Hist("Counts", dataset_axis, multiplicity_axis),
            #"bjets":                        hist.Hist("Counts", dataset_axis, multiplicity_axis),
            #"min_dphi_met_j1":              hist.Hist("Counts", dataset_axis, phi_axis),
            #"min_dphi_met_j2":              hist.Hist("Counts", dataset_axis, phi_axis),
            #"min_dphi_met_j3":              hist.Hist("Counts", dataset_axis, phi_axis),
            #"min_dphi_met_j4":              hist.Hist("Counts", dataset_axis, phi_axis),
            #"dphi_j1_j2":                   hist.Hist("Counts", dataset_axis, phi_axis),
            #"dphi_fj1_fj2":                 hist.Hist("Counts", dataset_axis, phi_axis),
            #"dR_fj1_fj2":                   hist.Hist("Counts", dataset_axis, r_axis),
            #"NN_score":                     hist.Hist("Counts", dataset_axis, score_axis),
            #"NN_sel":                     hist.Hist("Counts", dataset_axis, score_axis),
            #"NN_sel_mt":                     hist.Hist("Counts", dataset_axis, score_axis),
            #"NN_sel_met":                     hist.Hist("Counts", dataset_axis, score_axis),
            #"NN_sel_fatjet":                     hist.Hist("Counts", dataset_axis, score_axis),
            #"NN_sel_bjet":                     hist.Hist("Counts", dataset_axis, score_axis),
            #"NN_sel_mindphi":                     hist.Hist("Counts", dataset_axis, score_axis),
            #"NN_sel_jetdphi":                     hist.Hist("Counts", dataset_axis, score_axis),
            #"NN_sel_met_mt":                     hist.Hist("Counts", dataset_axis, score_axis),
            #"NN_sel_met_mt_fatjet":                     hist.Hist("Counts", dataset_axis, score_axis),
            #"NN_sel_met_mt_fatjet_bjet":                     hist.Hist("Counts", dataset_axis, score_axis),
            #"NN_sel_mindphi_jetdphi":                     hist.Hist("Counts", dataset_axis, score_axis),
            #"m_FatJet_softdrop":            hist.Hist("Counts", dataset_axis, mass_axis),

        })

    #Make sure to plug in the dataset axis and the properly binned axis you created above.
    #Cool. Now let's define some properties of the processor.
    
    @property
    
    #First is this guy. He does important things so always include him. 
    def accumulator(self):
        return self._accumulator

    #Now comes the fun part. Here's where we tell our processor exactly what to do with the data.
    def process(self, df):
     
        """
        Processing function. This is where the actual analysis happens.
        """
        output = self.accumulator.identity()
        dataset = df["dataset"]
        cfg = loadConfig()
        
        ## MET -> can switch to puppi MET
        met_pt  = df["MET_pt"]
        met_phi = df["MET_phi"]
        
        ## Muons
        muon = JaggedCandidateArray.candidatesfromcounts(
            df['nMuon'],
            pt = df['Muon_pt'].content,
            eta = df['Muon_eta'].content,
            phi = df['Muon_phi'].content,
            mass = df['Muon_mass'].content,
            miniPFRelIso_all=df['Muon_miniPFRelIso_all'].content,
            looseId =df['Muon_looseId'].content
            )
        muon = muon[(muon.pt > 10) & (abs(muon.eta) < 2.4) & (muon.looseId) & (muon.miniPFRelIso_all < 0.2)]
        #muon = Collections(df, "Muon", "tightTTH").get() # this needs a fix for DASK
        
        electrons = JaggedCandidateArray.candidatesfromcounts(
            df['nElectron'],
            pt=df['Electron_pt'].content, 
            eta=df['Electron_eta'].content, 
            phi=df['Electron_phi'].content,
            mass=df['Electron_mass'].content,
            pdgid=df['Electron_pdgId'].content,
            mini_iso=df['Electron_miniPFRelIso_all'].content
        )
        
        ## Electrons
        electron = JaggedCandidateArray.candidatesfromcounts(
            df['nElectron'],
            pt = df['Electron_pt'].content,
            eta = df['Electron_eta'].content,
            phi = df['Electron_phi'].content,
            mass = df['Electron_mass'].content,
            miniPFRelIso_all=df['Electron_miniPFRelIso_all'].content,
            cutBased=df['Electron_cutBased'].content
            )
        electron = electron[(electron.pt>10) & (abs(electron.eta) < 2.4) & (electron.miniPFRelIso_all < 0.1) &  (electron.cutBased >= 1)]
        #electron = Collections(df, "Electron", "tightTTH").get() # this needs a fix for DASK
        
        ## FatJets
        fatjet = JaggedCandidateArray.candidatesfromcounts(
            df['nFatJet'],
            pt = df['FatJet_pt'].content,
            eta = df['FatJet_eta'].content,
            phi = df['FatJet_phi'].content,
            mass = df['FatJet_mass'].content,
            msoftdrop = df["FatJet_msoftdrop"].content,  
            deepTagMD_HbbvsQCD = df['FatJet_deepTagMD_HbbvsQCD'].content, 
            deepTagMD_WvsQCD = df['FatJet_deepTagMD_WvsQCD'].content, 
            deepTag_WvsQCD = df['FatJet_deepTag_WvsQCD'].content
            
        )
        
        leadingFatJets = fatjet[:,:2]
        difatjet = leadingFatJets.choose(2)
        dphiDiFatJet = np.arccos(np.cos(difatjet.i0.phi-difatjet.i1.phi))
        
        htag = fatjet[((fatjet.pt > 200) & (fatjet.deepTagMD_HbbvsQCD > 0.8365))]
        htag_hard = fatjet[((fatjet.pt > 300) & (fatjet.deepTagMD_HbbvsQCD > 0.8365))]
        
        lead_htag = htag[htag.pt.argmax()]
        
        wtag = fatjet[((fatjet.pt > 200) & (fatjet.deepTagMD_HbbvsQCD < 0.8365) & (fatjet.deepTag_WvsQCD > 0.918))]
        wtag_hard = fatjet[((fatjet.pt > 300) & (fatjet.deepTagMD_HbbvsQCD < 0.8365) & (fatjet.deepTag_WvsQCD > 0.918))]
        
        lead_wtag = wtag[wtag.pt.argmax()]
        
        wh = lead_htag.cross(lead_wtag)
        wh_deltaPhi = np.arccos(wh.i0.phi - wh.i1.phi)
        wh_deltaR = wh.i0.p4.delta_r(wh.i1.p4)
        
        ## Jets
        jet = JaggedCandidateArray.candidatesfromcounts(
            df['nJet'],
            pt = df['Jet_pt'].content,
            eta = df['Jet_eta'].content,
            phi = df['Jet_phi'].content,
            mass = df['Jet_mass'].content,
            jetId = df['Jet_jetId'].content, # https://twiki.cern.ch/twiki/bin/view/CMS/JetID
            #puId = df['Jet_puId'].content, # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PileupJetID
            btagDeepB = df['Jet_btagDeepB'].content, # https://twiki.cern.ch/twiki/bin/viewauth/CMS/BtagRecommendation102X
            #deepJet = df['Jet_'].content # not there yet?
        )
        
        jet       = jet[(jet.pt>30) & (jet.jetId>1) & (abs(jet.eta)<2.4)]
        jet       = jet[~jet.match(muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~jet.match(electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        jet       = jet[jet.pt.argsort(ascending=False)] # sort the jets
        btag      = jet[(jet.btagDeepB>0.4184)]
        light     = jet[(jet.btagDeepB<0.4184)]
        
        ## Get the leading b-jets
        high_score_btag = jet[jet.btagDeepB.argsort(ascending=False)][:,:2]
        
        leading_jet    = jet[jet.pt.argmax()]
        leading_b      = btag[btag.pt.argmax()]
        
        bb = high_score_btag.choose(2)
        bb_deltaPhi = np.arccos(np.cos(bb.i0.phi-bb.i1.phi))
        bb_deltaR = bb.i0.p4.delta_r(bb.i1.p4)
        
        mtb = mt(btag.pt, btag.phi, met_pt, met_phi)
        min_mtb = mtb.min()
        
        ## other variables
        ht = jet.pt.sum()
        
        min_dphiJetMet4 = np.arccos(np.cos(jet[:,:4].phi-met_phi)).min()
        
        leadingJets = jet[:,:2]
        dijet = leadingJets.choose(2)
        dphiDiJet = np.arccos(np.cos(dijet.i0.phi-dijet.i1.phi))

        ## evaluate NN
        # first, prepare the inputs.
        # A .max() can ensure that the flattened array has the full length, but we rather use our pad_and_flatten function        
        # sorting in training: ['mll', 'njet', 'nbtag', 'st', 'ht', 'met', 'mjj_max', 'mlb_min', 'mlb_max', 'l0_pt', 'l1_pt', 'deltaR_lj_min', 'j0_pt']
        
        '''NN_inputs = np.stack([
            # normalize
            pad_and_flatten( (metpt - self.means['met'])/self.stds['met'] ),
            pad_and_flatten( (ht - self.means['ht'])/self.stds['ht'] ),
            pad_and_flatten( (lead_jet_pt - self.means['lead_jet_pt'])/self.stds['lead_jet_pt'] ),
            pad_and_flatten( (sublead_jet_pt - self.means['sublead_jet_pt'])/self.stds['sublead_jet_pt'] ),
            pad_and_flatten( (njets - self.means['njets'])/self.stds['njets'] ),
            pad_and_flatten( (nbjets - self.means['bjets'])/self.stds['bjets'] ),
            pad_and_flatten( (wtagged_mc.counts - self.means['nWs'])/self.stds['nWs'] ),
            pad_and_flatten( (htagged.counts - self.means['nHs'])/self.stds['nHs'] ),
            pad_and_flatten( (nfatjets - self.means['nFatJets'])/self.stds['nFatJets'] ),
            pad_and_flatten( (met_sig - self.means['met_significance'])/self.stds['met_significance'] ),
            pad_and_flatten( (abs_min_dphi_met_leadjs4 - self.means['min_dphi_met_j4'])/self.stds['min_dphi_met_j4'] ),
        ])
        
        NN_inputs = np.moveaxis(NN_inputs, 0, 1)
        NN_score = self.model.predict(NN_inputs)'''
        
       
        #Now it's time to make some selections. I'm going to guess that you can follow
        #what I'm doing from here. 

        ht_ps = (ht > 300)
        met_g250 = (met_pt>250)
        met_l400 = (met_pt<400)
        met_bin1 = met_g250 & met_l400
        met_g400 = (met_pt>400)
        met_l600 = (met_pt<600)
        met_bin2 = met_g400 & met_l600
        met_bin3 = (met_pt>600)
        njet_cut = (jet.counts>=2)
        njet_veto = (jet.counts<=5)
        njet_ps = njet_cut & njet_veto
        bjet_ps = (btag.counts>=1)
        fatjet_sel = (fatjet.counts >=1)
        inc_fatjet_sel = (fatjet.counts >=2)
        #mt_sel = (min_mt_b_met > 200).any()
        
        min_dphi_sel = (min_dphiJetMet4>0.5)
        dphi_sel = (dphiDiJet.min()<2.5)
        fatjet_dphi_sel = (dphiDiFatJet<2.5).all()

        e_sel = (electron.counts == 0)
        m_sel = (muon.counts == 0)
        #it_sel = (veto_it.counts == 0)
        #t_sel = (veto_t.counts == 0)
        #l_sel = e_sel & m_sel & it_sel & t_sel
        l_sel = ((electron.counts + muon.counts) == 1)
        
        h_sel =(htag.counts>0) 
        wmc_sel = (wtag.counts>0) 

        
        #sel = ht_ps & met_ps & njet_ps & bjet_ps & l_sel & h_sel & wmc_sel
        #sel = ht_ps & met_ps & njet_ps & bjet_ps & fatjet_sel & l_sel & h_sel & min_dphi_sel & dphi_sel & fatjet_dphi_sel
        sel1 = l_sel & njet_ps & bjet_ps & min_dphi_sel & dphi_sel & fatjet_dphi_sel & ht_ps & inc_fatjet_sel & h_sel & wmc_sel & met_bin1
        sel2 = l_sel & njet_ps & bjet_ps & min_dphi_sel & dphi_sel & fatjet_dphi_sel & ht_ps & inc_fatjet_sel & h_sel & wmc_sel & met_bin2
        sel3 = l_sel & njet_ps & bjet_ps & min_dphi_sel & dphi_sel & fatjet_dphi_sel & ht_ps & inc_fatjet_sel & h_sel & wmc_sel & met_bin3

        '''nn_sel = ht_ps & met_ps & njet_ps & bjet_ps & l_sel & fatjet_sel & h_sel #& wmc_sel
        nn_sel_mt = ht_ps & met_ps & njet_ps & bjet_ps & l_sel & fatjet_sel & mt_sel & h_sel #& wmc_sel
        nn_sel_met = ht_ps & high_met_ps & njet_ps & bjet_ps & l_sel & fatjet_sel & h_sel #& wmc_sel
        nn_sel_fatjet = ht_ps & high_met_ps & njet_ps & bjet_ps & l_sel & inc_fatjet_sel & h_sel #& wmc_sel
        nn_sel_bjet = ht_ps & high_met_ps & njet_ps & inc_bjet_ps & l_sel & fatjet_sel & h_sel #& wmc_sel
        nn_sel_mindphi = ht_ps & met_ps & njet_ps & bjet_ps & l_sel & fatjet_sel & h_sel & min_dphi_sel #& wmc_sel
        nn_sel_jetdphi = ht_ps & met_ps & njet_ps & bjet_ps & l_sel & fatjet_sel & h_sel & dphi_sel #& wmc_sel
        nn_sel_met_mt = ht_ps & high_met_ps & njet_ps & bjet_ps & l_sel & fatjet_sel & mt_sel & h_sel #& wmc_sel
        nn_sel_met_mt_fatjet = ht_ps & high_met_ps & njet_ps & bjet_ps & l_sel & inc_fatjet_sel & mt_sel & h_sel #& wmc_sel
        nn_sel_met_mt_fatjet_bjet = ht_ps & high_met_ps & njet_ps & inc_bjet_ps & l_sel & inc_fatjet_sel & mt_sel & h_sel #& wmc_sel
        nn_sel_mindphi_jetdphi = ht_ps & met_ps & njet_ps & bjet_ps & l_sel & fatjet_sel & h_sel & min_dphi_sel & dphi_sel #& wmc_sel'''
            
    
        #Let's make sure we weight our events properly.
        #wght = df['weight'][sel] * 137
        wght1 = df['weight'][sel1] * 60
        wght2 = df['weight'][sel2] * 60
        wght3 = df['weight'][sel3] * 60
        '''nn_wght = df['weight'][nn_sel] * 137
        nn_mt_wght = df['weight'][nn_sel_mt] * 137
        nn_met_wght = df['weight'][nn_sel_met] * 137
        nn_fatjet_wght = df['weight'][nn_sel_fatjet] * 137
        nn_bjet_wght = df['weight'][nn_sel_bjet] * 137
        nn_mindphi_wght = df['weight'][nn_sel_mindphi] * 137
        nn_jetdphi_wght = df['weight'][nn_sel_jetdphi] * 137
        nn_met_mt_wght = df['weight'][nn_sel_met_mt] * 137
        nn_met_mt_fatjet_wght = df['weight'][nn_sel_met_mt_fatjet] * 137
        nn_met_mt_fatjet_bjet_wght = df['weight'][nn_sel_met_mt_fatjet_bjet] * 137
        nn_mindphi_jetdphi_wght = df['weight'][nn_sel_mindphi_jetdphi] * 137
        '''#fj_wght = ((fatjets[sel].pt>0)*df['weight'][sel].flatten()) * 137
        #Since the weight will be the same for the entire dataset, I call the first 
        #element of the weight branch. This lets me bypass any issues I may come across
        #when I have arrays of different sizes than my weight branch. 
        
        
        #Let's fill some histograms. 
        output['h_pt_met200'].fill(dataset=dataset, pt=lead_htag[sel1].mass.flatten(), weight=wght1)
        output['h_pt_met400'].fill(dataset=dataset, pt=lead_htag[sel2].mass.flatten(), weight=wght2)
        output['h_pt_met600'].fill(dataset=dataset, pt=lead_htag[sel3].mass.flatten(), weight=wght3)
        
        '''output['met'].fill(dataset=dataset, pt=metpt[sel].flatten(), weight=wght)
        output['ht'].fill(dataset=dataset, pt=ht[sel].flatten(), weight=wght)
        #output['jet_pt'].fill(dataset=dataset, pt=jetpt_sorted[sel].flatten(), weight=wght)
        output['njets'].fill(dataset=dataset, multiplicity=njets[sel].flatten(), weight=wght)
        output['bjets'].fill(dataset=dataset, multiplicity=nbjets[sel].flatten(), weight=wght)   
        output['min_dphi_met_j1'].fill(dataset=dataset, phi=abs_min_dphi_met_leadjs1[sel].flatten(), weight=wght)
        output['min_dphi_met_j2'].fill(dataset=dataset, phi=abs_min_dphi_met_leadjs2[sel].flatten(), weight=wght)
        output['min_dphi_met_j3'].fill(dataset=dataset, phi=abs_min_dphi_met_leadjs3[sel].flatten(), weight=wght)
        output['min_dphi_met_j4'].fill(dataset=dataset, phi=abs_min_dphi_met_leadjs4[sel].flatten(), weight=wght)
        output['dphi_j1_j2'].fill(dataset=dataset, phi=abs_dphi_j1_j2[sel].flatten(), weight=wght)'''
        #output['dphi_fj1_fj2'].fill(dataset=dataset, phi=abs_dphi_fj1_fj2[sel].flatten(), weight=wght)
        #output['dR_fj1_fj2'].fill(dataset=dataset, r=dR_fj1_fj2[sel].flatten(), weight=wght)
        #output['NN_score'].fill(dataset=dataset, score=NN_score[sel].flatten(), weight=wght)
        '''output['NN_sel'].fill(dataset=dataset, score=NN_score[nn_sel].flatten(), weight=nn_wght)
        output['NN_sel_mt'].fill(dataset=dataset, score=NN_score[nn_sel_mt].flatten(), weight=nn_mt_wght)
        output['NN_sel_met'].fill(dataset=dataset, score=NN_score[nn_sel_met].flatten(), weight=nn_met_wght)
        output['NN_sel_fatjet'].fill(dataset=dataset, score=NN_score[nn_sel_fatjet].flatten(), weight=nn_fatjet_wght)
        output['NN_sel_bjet'].fill(dataset=dataset, score=NN_score[nn_sel_bjet].flatten(), weight=nn_bjet_wght)
        output['NN_sel_mindphi'].fill(dataset=dataset, score=NN_score[nn_sel_mindphi].flatten(), weight=nn_mindphi_wght)
        output['NN_sel_jetdphi'].fill(dataset=dataset, score=NN_score[nn_sel_jetdphi].flatten(), weight=nn_jetdphi_wght)
        output['NN_sel_met_mt'].fill(dataset=dataset, score=NN_score[nn_sel_met_mt].flatten(), weight=nn_met_mt_wght)
        output['NN_sel_met_mt_fatjet'].fill(dataset=dataset, score=NN_score[nn_sel_met_mt_fatjet].flatten(), weight=nn_met_mt_fatjet_wght)
        output['NN_sel_met_mt_fatjet_bjet'].fill(dataset=dataset, score=NN_score[nn_sel_met_mt_fatjet_bjet].flatten(), weight=nn_met_mt_fatjet_bjet_wght)
        output['NN_sel_mindphi_jetdphi'].fill(dataset=dataset, score=NN_score[nn_sel_mindphi_jetdphi].flatten(), weight=nn_mindphi_jetdphi_wght)
        '''#output['m_FatJet_softdrop'].fill(dataset=dataset, mass=fatjets[sel].softdrop.flatten(), weight=fj_wght)
        #Notice I have put .flatten() next to the data I'm inputting. This makes my
        #data arrays the appropriate format to input into histograms. 
        
        #Return that output, hunty!
        return output

    #Remember this bad boy and we're done with this block of code!
    
    def postprocess(self, accumulator):
        return accumulator

In [39]:
fileset   = {'mC750_l1': glob.glob('/hadoop/cms/store/user/ksalyer/allHadTest/0p1p20/WH_had_750_1_nanoAOD/*.root'),
            'WJets': glob.glob('/hadoop/cms/store/user/ksalyer/allHadTest/0p1p20/WJetsToLNu*/*.root'),
            'QCD': glob.glob('/hadoop/cms/store/user/ksalyer/allHadTest/0p1p20/QCD_HT*/*.root'),
            'TTJets': glob.glob('/hadoop/cms/store/user/ksalyer/allHadTest/0p1p20/TTJets*/*.root'),
            'ZNuNu': glob.glob('/hadoop/cms/store/user/ksalyer/allHadTest/0p1p20/ZJetsToNuNu*/*.root'),
            'ST':glob.glob('/hadoop/cms/store/user/ksalyer/allHadTest/0p1p20/ST*/*.root'),
            'ttW/ttZ':glob.glob('/hadoop/cms/store/user/ksalyer/allHadTest/0p1p20/ttWJets*/*.root')
                +glob.glob('/hadoop/cms/store/user/ksalyer/allHadTest/0p1p20/ttZJets*/*.root'),
            'WW/WZ/ZZ':glob.glob('/hadoop/cms/store/user/ksalyer/allHadTest/0p1p20/WW*/*.root')
                +glob.glob('/hadoop/cms/store/user/ksalyer/allHadTest/0p1p20/WZ*/*.root')
                +glob.glob('/hadoop/cms/store/user/ksalyer/allHadTest/0p1p20/ZZTo2L2Nu*/*.root')
                +glob.glob('/hadoop/cms/store/user/ksalyer/allHadTest/0p1p20/ZZTo2Q2Nu*/*.root')
                }

#Here, I've separated by data from my background. This lets me change the style of the
#signal line and keep the background consistent. 

output = processor.run_uproot_job(fileset,
                                    treename='Events',
                                    processor_instance=WHhadProcessor(),
                                    executor=processor.futures_executor,
                                    executor_args={'workers': 12, 'function_args': {'flatten': False}},
                                    chunksize=500000,
                                 )

#Here, we have the ability to change the 'workers' and 'chunksize', but to be honest,
#it does not make that much of a difference unless you want to see your progress bar 
#get updates more or less often. Totally a person choice.




In [40]:
#Here's a block to help you make some pretty histos. This deals solely with style.
#These first two variables define my signal line as red and defines the error bar style I want!
lineopts = {
    'color': 'r',
}

data_err_opts = {
    'linestyle': 'none',
    'marker': '_',
    'markersize': 10.,
    'color': 'r',
    'elinewidth': 1,
}

#Now, let's go to background. I define the line and fill color for the background because
#I want it to be a different style from my signal. 

lineopts2 = {
    'color': [('#7FB069'), ('#5171A5') ,('#E2AEDD'), ('#A33B20'), ('#680E4B'), ('#F6AE2D'),('#45503B')],
}
fillopts1 = {
    'edgecolor': (0,0,0,0.3),
    'facecolor': [('#7FB069'), ('#5171A5') ,('#E2AEDD'), ('#A33B20'), ('#680E4B'), ('#F6AE2D'),('#45503B')],
    #'facecolor': [('#1467cc'), ('#51d673') ,('#f7d969'), ('#af84f0'), ('#4f842e'), ('#1ff4ff'),('#3612ab')],
}

#Here are two special functions I wrote to help you easily print histos to your output
#directory. All you need to input is the signal and background histograms, the output
#directory and the name of the histogram. 

def savefig(hists, outdir, name):
    import re
    bkgonly = re.compile('(?!mC750_l1)')
    ax = hist.plot1d(hists[bkgonly], overlay="dataset", density=False, stack=True, 
                fill_opts = fillopts1, overflow = 'over')
    hist.plot1d(hists['mC750_l1'], overlay="dataset", density=False, stack=False, 
                error_opts=data_err_opts, overflow = 'over') 
    ax.set_yscale('log')
    ax.set_ylim(0.001,1000000)
    ax.figure.savefig(os.path.join(outdir, "{}_log.pdf".format(name)))
    ax.clear()

def savefigshape(hists, outdir, name):
    ax = hist.plot1d(hists, overlay="dataset", density=True, stack=False, 
                line_opts = lineopts2, overflow = 'over')
    
    ax.set_yscale('log')
    ax.set_ylim(0.00001,10)
    ax.figure.savefig(os.path.join(outdir, "{}_shape_log.pdf".format(name)))
    ax.clear()

In [41]:
#Let's remind ourselves of the histograms we created so we can loop through them 
#and create an array to loop through when we rebin. 
histograms = ["h_pt_met200",
              "h_pt_met400",
              "h_pt_met600",
              #"met",
              #"ht", 
              #"jet_pt", 
              #"njets", 
              #"bjets", 
              #"min_dphi_met_j1", 
              #"min_dphi_met_j2", 
              #"min_dphi_met_j3", 
              #"min_dphi_met_j4", 
              #"dphi_j1_j2", 
              #"dphi_fj1_fj2", 
              #"dR_fj1_fj2",
              #"NN_score",
              #"NN_sel",
              #"NN_sel_mt",
              #"NN_sel_met",
              #"NN_sel_fatjet",
              #"NN_sel_bjet",
              #"NN_sel_mindphi",
              #"NN_sel_jetdphi",
              #"NN_sel_met_mt",
              #"NN_sel_met_mt_fatjet",
              #"NN_sel_met_mt_fatjet_bjet",
              #"NN_sel_mindphi_jetdphi",
              #"mFatJet_softdrop"
             ]

#Make sure this points to a directory you can print to!
outdir = "/home/users/ksalyer/CMSSW_10_2_9/src/tW_scattering/tutorialPlots/"

In [42]:
#Let's loop through these histograms and rebin! Remember to change the binning for both the
#signal and background!!! Here I can also change the title of my plots!!
for name in histograms:
    print (name)
    hists = output[name]
    
    if name == "h_pt_met200":
        new_pt_bins = hist.Bin('pt', r'Lead Higgs pT', 15, 0, 300)
        hists = hists.rebin('pt', new_pt_bins)
    
    if name == "h_pt_met400":
        new_pt_bins = hist.Bin('pt', r'Lead Higgs pT', 15, 0, 300)
        hists = hists.rebin('pt', new_pt_bins)
    
    if name == "h_pt_met600":
        new_pt_bins = hist.Bin('pt', r'Lead Higgs pT', 15, 0, 300)
        hists = hists.rebin('pt', new_pt_bins)
    
    '''if name == "met":
        new_met_bins = hist.Bin('pt', r'MET', 26, 248, 1600)
        hists = hists.rebin('pt', new_met_bins)
        
    if name == "ht":
        new_ht_bins = hist.Bin('pt', r'HT', 25, 300, 2000)
        hists = hists.rebin('pt', new_ht_bins)
        
    #if name == "jet_pt":
        #new_jetpt_bins = hist.Bin('pt', r'jet $p_{T}$', 25, 300, 2000)
        #hists = hists.rebin('pt', new_jetpt_bins)
        
    if name == "njets":
        new_nj_bins = hist.Bin('multiplicity', r'nJets', 16, -0.5, 15.5)
        hists = hists.rebin('multiplicity', new_nj_bins)
        
    if name == "bjets":
        new_bj_bins = hist.Bin('multiplicity', r'nBJets', 7, -0.5, 6.5)
        hists = hists.rebin('multiplicity', new_bj_bins)
            
    if name == "min_dphi_met_j1":
        new_mdmj1_bins = hist.Bin('phi', r' $|min(\Delta \Phi$(MET, lead 1 jet))|', 20, 0 , 4)
        hists = hists.rebin('phi', new_mdmj1_bins)
            
    if name == "min_dphi_met_j2":
        new_mdmj2_bins = hist.Bin('phi', r' $|min(\Delta \Phi$(MET, lead 2 jets))|', 20, 0 , 4)
        hists = hists.rebin('phi', new_mdmj2_bins)
            
    if name == "min_dphi_met_j3":
        new_mdmj3_bins = hist.Bin('phi', r' $|min(\Delta \Phi$(MET, lead 3 jets))|', 20, 0 , 4)
        hists = hists.rebin('phi', new_mdmj3_bins)
            
    if name == "min_dphi_met_j4":
        new_mdmj4_bins = hist.Bin('phi', r' $|min(\Delta \Phi$(MET, lead 4 jets))|', 20, 0 , 4)
        hists = hists.rebin('phi', new_mdmj4_bins)
            
    if name == "dphi_j1_j2":
        new_dphij1j2_bins = hist.Bin('phi', r' $|\Delta \Phi$(leading 2 jets)|', 20, 0 , 4)
        hists = hists.rebin('phi', new_dphij1j2_bins)
            
    #if name == "dphi_fj1_fj2":
        #new_dphifj1fj2_bins = hist.Bin('phi', r' $|\Delta \Phi$(leading 2 FatJets)|', 20, 0 , 4)
        #hists = hists.rebin('phi', new_dphifj1fj2_bins)
            
    #if name == "dR_fj1_fj2":
        #new_dRfj1fj2_bins = hist.Bin('r', r' $\Delta R$', 20, 0 , 4)
        #hists = hists.rebin('r', new_dRfj1fj2_bins)
            
    if name == "NN_score":
        new_nn_score_bins = hist.Bin('r', r'NN Score', 10, 0 , 1)
        hists = hists.rebin('score', new_nn_score_bins)
            
    if name == "NN_sel":
        new_nn_score_bins = hist.Bin('r', r'NN Score', 10, 0 , 1)
        hists = hists.rebin('score', new_nn_score_bins)
            
    if name == "NN_sel_mt":
        new_nn_score_bins = hist.Bin('r', r'NN Score', 10, 0 , 1)
        hists = hists.rebin('score', new_nn_score_bins)
            
    if name == "NN_sel_met":
        new_nn_score_bins = hist.Bin('r', r'NN Score', 10, 0 , 1)
        hists = hists.rebin('score', new_nn_score_bins)
            
    if name == "NN_sel_fatjet":
        new_nn_score_bins = hist.Bin('r', r'NN Score', 10, 0 , 1)
        hists = hists.rebin('score', new_nn_score_bins)
            
    if name == "NN_sel_bjet":
        new_nn_score_bins = hist.Bin('r', r'NN Score', 10, 0 , 1)
        hists = hists.rebin('score', new_nn_score_bins)
            
    if name == "NN_sel_mindphi":
        new_nn_score_bins = hist.Bin('r', r'NN Score', 10, 0 , 1)
        hists = hists.rebin('score', new_nn_score_bins)
            
    if name == "NN_sel_jetdphi":
        new_nn_score_bins = hist.Bin('r', r'NN Score', 10, 0 , 1)
        hists = hists.rebin('score', new_nn_score_bins)
            
    if name == "NN_sel_met_mt":
        new_nn_score_bins = hist.Bin('r', r'NN Score', 10, 0 , 1)
        hists = hists.rebin('score', new_nn_score_bins)
            
    if name == "NN_sel_met_mt_fatjet":
        new_nn_score_bins = hist.Bin('r', r'NN Score', 10, 0 , 1)
        hists = hists.rebin('score', new_nn_score_bins)
            
    if name == "NN_sel_met_mt_fatjet_bjet":
        new_nn_score_bins = hist.Bin('r', r'NN Score', 10, 0 , 1)
        hists = hists.rebin('score', new_nn_score_bins)
            
    if name == "NN_sel_mindphi_jetdphi":
        new_nn_score_bins = hist.Bin('r', r'NN Score', 10, 0 , 1)
        hists = hists.rebin('score', new_nn_score_bins)'''
            
    #if name == "mFatJet_softdrop":
        #new_mFatJet_softdrop_bins = hist.Bin('mass', r'FatJet softdrop mass', 12, 0 , 300)
        #hists = hists.rebin('mass', new_mFatJet_softdrop_bins)
        
    savefig(hists,outdir, name)
    savefigshape(hists,outdir, name)
#In these last two lines, I call those special histogram functions I made! Check your
#output directory once this is done and all your pretty plots will be there!

h_pt_met200




h_pt_met400




h_pt_met600


