## Data/MC Comparison for Brem-Induced Clusters

#### Comparison of Clusters in Data vs MC. This is to validate the signal reconstruction process. We compare cluster from Z->MuMu events in Data vs those from a DY->ZMuMu (50-120 GeV for MLL). Data is from 2023B&C, and MC is from the preBPix, normalized to the appropriate value 

In [1]:
import numpy as np
import pandas as pd
import uproot
import matplotlib.pyplot as plt
import sys
sys.path.insert(0,"../")
import mplhep as hep
import pickle
import glob
import ROOT as rt
import coffea
import awkward as ak
from coffea import hist, processor
from coffea.nanoevents.methods import candidate
from coffea.nanoevents.methods import vector
import os



Welcome to JupyROOT 6.28/00


  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)


In [2]:
#import helper modules for muon scale factor computation
sys.path.append("/uscms/home/amalbert/nobackup/CMSSW_14_1_0_pre4/src/RazorCommon/Tools/bin")
import importlib
import getMuonScaleFactor

#### Load ntuples as awkward arrays

In [3]:
ak.behavior.update(candidate.behavior)

def getLZDF(f,nEvents=-1,version="new"): #lazy dataframe with events that have cluster matched to probe muon
    events_raw = uproot.open(f)['MuonSystem']
    df = coffea.processor.LazyDataFrame(events_raw,entrystop=nEvents)
    start,stop = df._branchargs['entry_start'],df._branchargs['entry_stop']
    events = uproot.lazy(df._tree)
    #events = events[start:stop]
    return events

In [4]:
#paths
MC_paths = {#"2022":"/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/MC_noiseFilters/MC_Summer22/DYto2Mu_MLL-50to120_keepMDSHits_Merged/DYto2Mu_MLL-50to120_keepMDSHits_7980pb_weighted.root",
            "2022EE":"/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/MC_noiseFilters_fixed/MC_Summer22EE/DYto2Mu_MLL-50to120_keepMDSHits_Merged/DYto2Mu_MLL-50to120_keepMDSHits_26642pb_weighted.root",
            "2023":"/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/MC_noiseFilters_fixed/MC_Summer23/DYto2Mu_MLL-50to120_Merged/DYto2Mu_MLL-50to120_18411pb_weighted.root",
            "2023BPix":"/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/MC_noiseFilters_fixed/MC_Summer23BPix/DYto2Mu_MLL-50to120_Merged/DYto2Mu_MLL-50to120_9451pb_weighted.root"}

data_path_lists = {#"2022":["/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2022_Merged/Muon_Run2022C_PromptReco-v1_goodLumi.root",
                   #       "/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2022_Merged/Muon_Run2022D_PromptReco-v1_goodLumi.root"],
                 
                   "2022EE":["/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2022_Merged/Muon_Run2022E_PromptReco-v1_goodLumi.root",
                          "/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2022_Merged/Muon_Run2022F_PromptReco-v1_goodLumi.root",
                            "/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2022_Merged/Muon_Run2022G_PromptReco-v1_goodLumi.root"],
                   
                    "2023":["/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2023_Merged/Muon0_Run2023B_PromptReco-v1_goodLumi.root",
                     "/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2023_Merged/Muon1_Run2023B_PromptReco-v1_goodLumi.root",
                     "/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2023_Merged/Muon0_Run2023C_PromptReco-v1_goodLumi.root",
                     "/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2023_Merged/Muon1_Run2023C_PromptReco-v1_goodLumi.root",
                     "/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2023_Merged/Muon0_Run2023C_PromptReco-v2_goodLumi.root",
                     "/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2023_Merged/Muon1_Run2023C_PromptReco-v2_goodLumi.root",
                     "/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2023_Merged/Muon0_Run2023C_PromptReco-v3_goodLumi.root",
                     "/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2023_Merged/Muon1_Run2023C_PromptReco-v3_goodLumi.root",
                     "/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2023_Merged/Muon0_Run2023C_PromptReco-v4_goodLumi.root",
                     "/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2023_Merged/Muon1_Run2023C_PromptReco-v4_goodLumi.root"],
                  
                      "2023BPix":["/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2023_Merged/Muon0_Run2023D_PromptReco-v1_goodLumi.root",
                     "/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2023_Merged/Muon1_Run2023D_PromptReco-v1_goodLumi.root",
                     "/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2023_Merged/Muon0_Run2023D_PromptReco-v2_goodLumi.root",
                     "/store/group/lpclonglived/amalbert/Data_MC_Comp_TnP/results_from_cache_noSkim/Data_noiseFilters_fixed/2023_Merged/Muon1_Run2023D_PromptReco-v2_goodLumi.root"]}

In [5]:
events_MC_full_dict= {}
for campaign, MC_path in MC_paths.items():
    print(campaign)
    if campaign!="2022EE":continue
    events_MC_full = getLZDF("root://cmseos.fnal.gov/"+MC_path)
    events_MC_full = events_MC_full[events_MC_full.nCscRechitClusters>0]
    events_MC_full = events_MC_full[np.logical_and(events_MC_full.ZMass>50, events_MC_full.ZMass<120)]
    events_MC_full = events_MC_full[events_MC_full.Flag_all]
    events_MC_full = events_MC_full[events_MC_full.Flag_ecalBadCalibFilter]
    events_MC_full = events_MC_full[events_MC_full.jetVeto]
    events_MC_full_dict[campaign] = events_MC_full
    
events_data_full_dict= {}
for campaign, data_path_list in data_path_lists.items():
    print(campaign)
    if campaign!="2022EE":continue
    data_events = [getLZDF("root://cmseos.fnal.gov/"+data_path) for data_path in data_path_list]
    events_data_full = ak.concatenate(data_events, axis=0)
    events_data_full = events_data_full[events_data_full.nCscRechitClusters>0]
    events_data_full = events_data_full[np.logical_and(events_data_full.ZMass>50, events_data_full.ZMass<120)]
    events_data_full = events_data_full[events_data_full.Flag_all]
    events_data_full = events_data_full[events_data_full.Flag_ecalBadCalibFilter]
    events_data_full = events_data_full[events_data_full.jetVeto]
    events_data_full_dict[campaign] = events_data_full

2022EE
2023
2023BPix
2022EE
2023
2023BPix


In [6]:
MC_kFactors = {"2022":1,"2022EE":0.8425135156354998,"2023":0.9287192347533128,"2023BPix":0.9470132857601179}

### modify the input ntuples so that each entry corresponds with a cluster. As a result, some entries will be repeated twice (tne ones denoted "branch names") if there are two clusters in the event. At this step, all of the branches that we compute for the measurement should be included

In [7]:
#define cluster level csc branches needed
csc_branches = []; csc_chamber_hit_branches = []
for branch_name in events_MC_full.fields: 
    if "csc" in branch_name and "dt" not in branch_name and "LLP" not in branch_name and "DNN" not in branch_name:
        csc_branches.append(branch_name)
    if "cscRechitClusterNRechit" in branch_name:
        csc_chamber_hit_branches.append(branch_name)
csc_chamber_hit_branches = np.array(csc_chamber_hit_branches)
ME11MinusIndex = np.where(csc_chamber_hit_branches=="cscRechitClusterNRechitChamberMinus11")[0]
ME12MinusIndex = np.where(csc_chamber_hit_branches=="cscRechitClusterNRechitChamberMinus12")[0]
ME11PlusIndex = np.where(csc_chamber_hit_branches=="cscRechitClusterNRechitChamberPlus11")[0]
ME12PlusIndex = np.where(csc_chamber_hit_branches=="cscRechitClusterNRechitChamberPlus12")[0]

forward_chamber_field_indices = [ME11MinusIndex, ME12MinusIndex, ME11PlusIndex, ME12PlusIndex]
#event-level branches        
branch_names = ["runNum", "evtNum", "weight", "pileupWeight", "ZMass", "met", "metPhi", "puppiMet", "puppiMetPhi"]


In [8]:
#make more useful input awkward array, with all information in cluster-level format
def getClusterBranches(LZDF, campaign, isMC=False):
    new_df = ak.zip({field: ak.flatten(LZDF[field]) for field in csc_branches})
    
    #compute cluster max chamber
    hits_by_chamber = np.stack([ak.flatten(LZDF[branch]) for branch in csc_chamber_hit_branches], axis=1)
    #print(hits_by_chamber)
    #print(np.array(hits_by_chamber).shape)
    maxBranchIndex = np.argmax(hits_by_chamber, axis=1)
    #print(maxBranchIndex)
    chamber_masks_lists = []
    for chamber_index in forward_chamber_field_indices:
        chamber_masks_lists.append((maxBranchIndex==chamber_index))
        #print(np.where(maxBranchIndex==chamber_index))
    #print(np.stack(chamber_masks_lists, axis=1))
    new_df = ak.with_field(new_df, np.any(np.stack(chamber_masks_lists, axis=1), axis=1), "forward_max_chamber")
    
    print("finished csc branches")
    newDNN = ak.flatten(ak.mask(LZDF["cscRechitClusterDNN_bkgMC_plusBeamHalo"], LZDF["cscRechitClusterDNN_bkgMC_plusBeamHalo"]>0))
    newDNN = newDNN[~ak.is_none(newDNN)]
    new_df = ak.with_field(new_df, newDNN, "cscRechitClusterDNN_bkgMC_plusBeamHalo")
    
    for branch in branch_names:
        if (not isMC) and (branch in ["weight", "pileupWeight"]):
            continue
        new_df = ak.with_field(new_df, np.repeat(LZDF[branch],LZDF["nCscRechitClusters"]), branch)

    
    column_indices_probe = np.array(ak.flatten(ak.values_astype(LZDF["cscRechitCluster_matchToMuon2"], int)))
    column_indices_tag = np.array(ak.flatten(ak.values_astype(LZDF["cscRechitCluster_matchToMuon1"], int)))
    row_indices = np.arange(np.size(column_indices_probe), dtype=int)
    
    #compute 
    if isMC:
        MC_SF_LooseID = getMuonScaleFactor.getLooseIDEffArr_preBPix(np.array(LZDF.lepPt), np.array(LZDF.lepEta))
        MC_SF_LooseISO = getMuonScaleFactor.getLooseISOEffArr_preBPix(np.array(LZDF.lepPt), np.array(LZDF.lepEta))
        MC_SF_TightID = getMuonScaleFactor.getTightIDEffArr_preBPix(np.array(LZDF.lepPt), np.array(LZDF.lepEta))
        MC_SF_TightISO = getMuonScaleFactor.getTightISOEffArr_preBPix(np.array(LZDF.lepPt), np.array(LZDF.lepEta))
        MC_SF_HLT = getMuonScaleFactor.getHLTEffArr_preBPix(np.array(LZDF.lepPt), np.array(LZDF.lepEta))
        
        MC_SF_LooseID = np.repeat(MC_SF_LooseID,np.array(LZDF["nCscRechitClusters"]), axis=0)[row_indices,column_indices_probe]
        MC_SF_LooseISO = np.repeat(np.array(MC_SF_LooseISO),np.array(LZDF["nCscRechitClusters"]), axis=0)[row_indices,column_indices_probe]
        MC_SF_TightID = np.repeat(MC_SF_TightID,np.array(LZDF["nCscRechitClusters"]), axis=0)[row_indices,column_indices_tag]
        MC_SF_TightISO = np.repeat(MC_SF_TightISO,np.array(LZDF["nCscRechitClusters"]), axis=0)[row_indices,column_indices_tag]
        MC_SF_HLT = np.repeat(MC_SF_HLT,np.array(LZDF["nCscRechitClusters"]), axis=0)[row_indices,column_indices_tag]
        
        MC_Weight_Total = new_df["weight"]*new_df["pileupWeight"]*MC_SF_LooseID*MC_SF_LooseISO*MC_SF_TightID*MC_SF_TightISO*MC_SF_HLT*MC_kFactors[campaign]
        new_df = ak.with_field(new_df, MC_Weight_Total, "weight_total")
    
    print("at muon variables")
    
#     #load pT, eta, and phi for tag and probe muons
#     probe_pT = np.repeat(np.array(LZDF["lepPt"]),np.array(LZDF["nCscRechitClusters"]), axis=0)[row_indices,column_indices_probe]
#     probe_eta = np.repeat(np.array(LZDF["lepEta"]),np.array(LZDF["nCscRechitClusters"]), axis=0)[row_indices,column_indices_probe]
#     probe_phi = np.repeat(np.array(LZDF["lepPhi"]),np.array(LZDF["nCscRechitClusters"]), axis=0)[row_indices,column_indices_probe]
    
#     tag_pT = np.repeat(np.array(LZDF["lepPt"]),np.array(LZDF["nCscRechitClusters"]), axis=0)[row_indices,column_indices_tag]
#     tag_eta = np.repeat(np.array(LZDF["lepEta"]),np.array(LZDF["nCscRechitClusters"]), axis=0)[row_indices,column_indices_tag]
#     tag_phi = np.repeat(np.array(LZDF["lepPhi"]),np.array(LZDF["nCscRechitClusters"]), axis=0)[row_indices,column_indices_tag]
    
#     new_df = ak.with_field(new_df, probe_pT, "probe_pT")
#     new_df = ak.with_field(new_df, probe_eta, "probe_eta")
#     new_df = ak.with_field(new_df, probe_phi, "probe_phi")
    
#     new_df = ak.with_field(new_df, tag_pT, "tag_pT")
#     new_df = ak.with_field(new_df, tag_eta, "tag_eta")
#     new_df = ak.with_field(new_df, tag_phi, "tag_phi")
    
#     #deltaR(cluster, muon)
#     new_df = ak.with_field(new_df, np.sqrt((new_df["cscRechitClusterEta"]-new_df["probe_eta"])**2+(new_df["cscRechitClusterPhi"]-new_df["probe_phi"])**2), "cscRechitClusterMuonDeltaR")
    
#     #DNN inputs - hit fractions in stations/rings
#     new_df = ak.with_field(new_df, (new_df.cscRechitClusterNRechitChamberPlus11+new_df.cscRechitClusterNRechitChamberMinus11+new_df.cscRechitClusterNRechitChamberPlus12+new_df.cscRechitClusterNRechitChamberMinus12+new_df.cscRechitClusterNRechitChamberPlus13+new_df.cscRechitClusterNRechitChamberMinus13)/new_df.cscRechitClusterSize, "cscRechitClusterFracS1")
#     new_df = ak.with_field(new_df, (new_df.cscRechitClusterNRechitChamberPlus21+new_df.cscRechitClusterNRechitChamberMinus21+new_df.cscRechitClusterNRechitChamberPlus22+new_df.cscRechitClusterNRechitChamberMinus22)/new_df.cscRechitClusterSize, "cscRechitClusterFracS2")
#     new_df = ak.with_field(new_df, (new_df.cscRechitClusterNRechitChamberPlus31+new_df.cscRechitClusterNRechitChamberMinus31+new_df.cscRechitClusterNRechitChamberPlus32+new_df.cscRechitClusterNRechitChamberMinus32)/new_df.cscRechitClusterSize, "cscRechitClusterFracS3")
#     new_df = ak.with_field(new_df, (new_df.cscRechitClusterNRechitChamberPlus41+new_df.cscRechitClusterNRechitChamberMinus41+new_df.cscRechitClusterNRechitChamberPlus42+new_df.cscRechitClusterNRechitChamberMinus42)/new_df.cscRechitClusterSize, "cscRechitClusterFracS4")

#     new_df = ak.with_field(new_df,(new_df.cscRechitClusterNRechitChamberPlus11+new_df.cscRechitClusterNRechitChamberMinus11+new_df.cscRechitClusterNRechitChamberPlus21+new_df.cscRechitClusterNRechitChamberMinus21+new_df.cscRechitClusterNRechitChamberPlus31+new_df.cscRechitClusterNRechitChamberMinus31+new_df.cscRechitClusterNRechitChamberPlus41+new_df.cscRechitClusterNRechitChamberMinus41)/new_df.cscRechitClusterSize, "cscRechitClusterFracR1")
#     new_df = ak.with_field(new_df, (new_df.cscRechitClusterNRechitChamberPlus12+new_df.cscRechitClusterNRechitChamberMinus12+new_df.cscRechitClusterNRechitChamberPlus22+new_df.cscRechitClusterNRechitChamberMinus22+new_df.cscRechitClusterNRechitChamberPlus32+new_df.cscRechitClusterNRechitChamberMinus32+new_df.cscRechitClusterNRechitChamberPlus42+new_df.cscRechitClusterNRechitChamberMinus42)/new_df.cscRechitClusterSize, "cscRechitClusterFracR2")
#     new_df = ak.with_field(new_df, (new_df.cscRechitClusterNRechitChamberPlus13+new_df.cscRechitClusterNRechitChamberMinus13)/new_df.cscRechitClusterSize, "cscRechitClusterFracR3")
    
    #forward hits branch
    new_df = ak.with_field(new_df, new_df.cscRechitClusterNRechitChamberPlus11+new_df.cscRechitClusterNRechitChamberMinus11+new_df.cscRechitClusterNRechitChamberPlus12 + new_df.cscRechitClusterNRechitChamberMinus12, "forward_hits")
    return new_df

In [9]:
events_MC_dict = {}; events_data_dict = {}
for campaign in list(events_MC_full_dict.keys()):
    print(campaign)
    print("MC")
    events_MC = getClusterBranches(events_MC_full_dict[campaign], campaign, True)
    print("now data")
    events_data = getClusterBranches(events_data_full_dict[campaign], campaign, False)
    events_MC_dict[campaign] = events_MC
    events_data_dict[campaign] = events_data

2022EE
MC
finished csc branches
at muon variables
now data
finished csc branches
at muon variables


### Code to Mask Data According to Specific Cuts - Low MET and High MET, along with cutflows

In [10]:
def makeForwardVetoMask(events, mask, forwardVetoList: list=[]):
    forwardMask = mask
    if "forward_veto" in forwardVetoList:
        forwardMask = ak.mask(forwardMask, events.forward_hits==0)
    if "forward_veto_mod" in forwardVetoList:
        forwardMask = ak.mask(forwardMask, events.forward_max_chamber==False)
    if "forward_veto_highMET" in forwardVetoList:
        forwardMask = ak.mask(forwardMask, (events.cscRechitClusterNRechitChamberPlus11+events.cscRechitClusterNRechitChamberMinus11)/events.cscRechitClusterSize<1)
    return forwardMask

In [11]:
def makeEventMask(events, noMaskList: list=[], forwardVetoMaskList: list=[], noCuts=False):
    mask = events.cscRechitCluster_matchToProbeMuon
    mask = makeForwardVetoMask(events, mask, forwardVetoMaskList)
    #mask out hotspot automatically
    mask=ak.mask(mask, np.logical_or(np.logical_and(np.logical_or(events.cscRechitClusterPhi<-0.3,events.cscRechitClusterPhi>0.6),abs(events.cscRechitClusterPhi)<2.8), events.cscRechitClusterEta>-1.9))
    if noCuts:
        return mask
    if "timespread_veto" not in noMaskList:
        mask = ak.mask(mask, events.cscRechitClusterTimeSpreadWeightedAll<20)
    if "time_veto" not in noMaskList:
        mask = ak.mask(mask, events.cscRechitClusterTimeWeighted<12.5)
        mask = ak.mask(mask, events.cscRechitClusterTimeWeighted>-5)
    if "DNN_veto" not in noMaskList:
        mask = ak.mask(mask, events.cscRechitClusterDNN_bkgMC_plusBeamHalo>0.96)
    if "clusterSize_veto" not in noMaskList:
        mask = ak.mask(mask, events.cscRechitClusterSize>160)
    if "NStation10_veto" not in noMaskList:
        mask = ak.mask(mask, events.cscRechitClusterNStation10>1)
    return mask

In [12]:
def makeEventMaskHighMET(events, noMaskList: list=[], forwardVetoMaskList: list=[], noCuts=False):
    mask = events.cscRechitCluster_matchToProbeMuon
    mask = makeForwardVetoMask(events, mask, forwardVetoMaskList)
    #mask out hotspot automatically
    mask=ak.mask(mask, np.logical_or(np.logical_and(np.logical_or(events.cscRechitClusterPhi<-0.3,events.cscRechitClusterPhi>0.6),abs(events.cscRechitClusterPhi)<2.8), events.cscRechitClusterEta>-1.9))
    if noCuts:
        return mask
    if "timespread_veto" not in noMaskList: #not actually applied in the analysis
        mask = ak.mask(mask, events.cscRechitClusterTimeSpreadWeightedAll<20)
    if "time_veto" not in noMaskList:
        mask = ak.mask(mask, events.cscRechitClusterTimeWeighted<12.5)
        mask = ak.mask(mask, events.cscRechitClusterTimeWeighted>-5)
    if "DNN_veto" not in noMaskList: # not actually applied in the analysis
        mask = ak.mask(mask, events.cscRechitClusterDNN_bkgMC_plusBeamHalo>0.96)
    if "clusterSize_veto" not in noMaskList:
        mask = ak.mask(mask, events.cscRechitClusterSize>150) #150 instead of 160 for low MET
    return mask

#### compute efficiencies (no cuts applied other than forward veto or high MET equivalent, except for measurement of forward veto efficiency itself)


In [13]:
# data efficiencies

for campaign in list(events_data_dict.keys()):
    print("###################################")
    print("###################################")
    print(f"Computing Efficiencies for {campaign}")
    events_data = events_data_dict[campaign]
    print("computing low MET efficiencies in Data")

    denom = ak.count_nonzero(makeEventMask(events_data, [], [], True))
    print("Data Denominator: ", denom)

    num_forward = ak.count_nonzero(makeEventMask(events_data, [], ["forward_veto"], True))
    print("Forward Veto Efficiency: ", num_forward/denom*100)

    num_forward_mod = ak.count_nonzero(makeEventMask(events_data, [], ["forward_veto_mod"], True))
    print("Modified Forward Veto Efficiency: ", num_forward_mod/denom*100)

    print("clusters passing new forward veto: ", num_forward_mod)

    num_timespread = ak.count_nonzero(makeEventMask(events_data, ['clusterSize_veto','DNN_veto','time_veto', "NStation10_veto"],["forward_veto_mod"]))
    print("Timespread Veto Efficiency: ", num_timespread/num_forward_mod*100)

    num_time = ak.count_nonzero(makeEventMask(events_data, ['clusterSize_veto','DNN_veto','timespread_veto', "NStation10_veto"],["forward_veto_mod"]))
    print("Time Veto Efficiency: ", num_time/num_forward_mod*100)

    num_DNN = ak.count_nonzero(makeEventMask(events_data, ['timespread_veto','clusterSize_veto','time_veto', "NStation10_veto"],["forward_veto_mod"]))
    print("DNN Veto Efficiency: ", num_DNN/num_forward_mod*100)

    num_clusterSize = ak.count_nonzero(makeEventMask(events_data, ['timespread_veto','DNN_veto','time_veto', "NStation10_veto"],["forward_veto_mod"]))
    print("ClusterSize Veto Efficiency: ", num_clusterSize/num_forward_mod*100)

    num_NStation10 = ak.count_nonzero(makeEventMask(events_data, ['timespread_veto','DNN_veto','time_veto', 'clusterSize_veto'],["forward_veto_mod"]))
    print("NStation10 Veto Efficiency: ", num_NStation10/num_forward_mod*100)

###################################
###################################
Computing Efficiencies for 2022EE
computing low MET efficiencies in Data
Data Denominator:  2835731
Forward Veto Efficiency:  1.8570167621682028
Modified Forward Veto Efficiency:  66.37135186659101
clusters passing new forward veto:  1882113
Timespread Veto Efficiency:  36.366573101615046
Time Veto Efficiency:  89.91070142972288
DNN Veto Efficiency:  23.71095678102218
ClusterSize Veto Efficiency:  5.093317988877395
NStation10 Veto Efficiency:  85.46484722224436


In [14]:
# MC efficiencies

for campaign in list(events_MC_dict.keys()):
    print("###################################")
    print("###################################")
    print(f"Computing Efficiencies for {campaign}")
    events_MC = events_MC_dict[campaign]
    print("computing low MET efficiencies in MC")

    denom = ak.sum(ak.mask(events_MC.weight_total, makeEventMask(events_MC, [], [], True)))
    print("MC Denominator: ", denom)

    num_forward = ak.sum(ak.mask(events_MC.weight_total, makeEventMask(events_MC, [], ["forward_veto"], True)))
    print("Forward Veto Efficiency: ", num_forward/denom*100)

    num_forward_mod = ak.sum(ak.mask(events_MC.weight_total, makeEventMask(events_MC, [], ["forward_veto_mod"], True)))
    print("Modified Forward Veto Efficiency: ", num_forward_mod/denom*100)
    
    print("clusters passing new forward veto: ", num_forward_mod)

    num_timespread = ak.sum(ak.mask(events_MC.weight_total, makeEventMask(events_MC, ['clusterSize_veto','DNN_veto','time_veto', "NStation10_veto"],["forward_veto_mod"])))
    print("Timespread Veto Efficiency: ", num_timespread/num_forward_mod*100)

    num_time = ak.sum(ak.mask(events_MC.weight_total, makeEventMask(events_MC, ['clusterSize_veto','DNN_veto','timespread_veto', "NStation10_veto"],["forward_veto_mod"])))
    print("Time Veto Efficiency: ", num_time/num_forward_mod*100)

    num_DNN = ak.sum(ak.mask(events_MC.weight_total, makeEventMask(events_MC, ['timespread_veto','clusterSize_veto','time_veto', "NStation10_veto"],["forward_veto_mod"])))
    print("DNN Veto Efficiency: ", num_DNN/num_forward_mod*100)

    num_clusterSize = ak.sum(ak.mask(events_MC.weight_total, makeEventMask(events_MC, ['timespread_veto','DNN_veto','time_veto', "NStation10_veto"],["forward_veto_mod"])))
    print("ClusterSize Veto Efficiency: ", num_clusterSize/num_forward_mod*100)

    num_NStation10 = ak.sum(ak.mask(events_MC.weight_total, makeEventMask(events_MC, ['timespread_veto','DNN_veto','time_veto', 'clusterSize_veto'],["forward_veto_mod"])))
    print("NStation10 Veto Efficiency: ", num_NStation10/num_forward_mod*100)

###################################
###################################
Computing Efficiencies for 2022EE
computing low MET efficiencies in MC
MC Denominator:  1744175.9760305993
Forward Veto Efficiency:  3.232207753951318
Modified Forward Veto Efficiency:  70.05398887530895
clusters passing new forward veto:  1221864.8442142874
Timespread Veto Efficiency:  81.90520820348655
Time Veto Efficiency:  94.08350316798364
DNN Veto Efficiency:  21.908502648559704
ClusterSize Veto Efficiency:  6.051809428316391
NStation10 Veto Efficiency:  70.97186548816615


In [15]:

for campaign in list(events_MC_dict.keys()):
    print("###################################")
    print("###################################")
    print(f"Computing Efficiencies for {campaign}")
    events_data = events_data_dict[campaign]
    
    # data efficiencies
    print("computing high MET efficiencies in Data")

    denom_noForward = ak.count_nonzero(makeEventMaskHighMET(events_data, [], [], True))
    print("Data Denominator: ", denom_noForward)


    num_forward = ak.count_nonzero(makeEventMaskHighMET(events_data, [], ["forward_veto_highMET"], True))
    print("Forward Veto Efficiency: ", num_forward/denom_noForward*100)

    num_timespread = ak.count_nonzero(makeEventMaskHighMET(events_data, ['clusterSize_veto','DNN_veto','time_veto']))
    print("Timespread Veto Efficiency: ", num_timespread/denom*100)

    num_time = ak.count_nonzero(makeEventMaskHighMET(events_data, ['clusterSize_veto','DNN_veto','timespread_veto']))
    print("Time Veto Efficiency: ", num_time/denom*100)

    num_DNN = ak.count_nonzero(makeEventMaskHighMET(events_data, ['timespread_veto','clusterSize_veto','time_veto']))
    print("DNN Veto Efficiency: ", num_DNN/denom*100)


    num_clusterSize = ak.count_nonzero(makeEventMaskHighMET(events_data, ['timespread_veto','DNN_veto','time_veto']))
    print("ClusterSize Veto Efficiency: ", num_clusterSize/denom*100)

###################################
###################################
Computing Efficiencies for 2022EE
computing high MET efficiencies in Data
Data Denominator:  2835731
Forward Veto Efficiency:  99.99583881545887
Timespread Veto Efficiency:  56.0893518454721
Time Veto Efficiency:  144.19737655851515
DNN Veto Efficiency:  43.3902318573572
ClusterSize Veto Efficiency:  8.469099565066394


In [16]:


for campaign in list(events_MC_dict.keys()):
    print("###################################")
    print("###################################")
    print(f"Computing Efficiencies for {campaign}")
    events_MC = events_MC_dict[campaign]

    print("computing high MET efficiencies in MC")


    denom = ak.sum(ak.mask(events_MC.weight_total, makeEventMaskHighMET(events_MC, [], [], True)))
    print("MC Denominator: ", denom)


    num_forward = ak.sum(ak.mask(events_MC.weight_total, makeEventMaskHighMET(events_MC,[],["forward_veto_highMET"], True)))
    print("Forward Veto Efficiency: ", num_forward/denom*100)

    num_timespread = ak.sum(ak.mask(events_MC.weight_total, makeEventMaskHighMET(events_MC,['clusterSize_veto','DNN_veto','time_veto'])))
    print("Timespread Veto Efficiency: ", num_timespread/denom*100)

    num_time = ak.sum(ak.mask(events_MC.weight_total, makeEventMaskHighMET(events_MC,['timespread_veto','clusterSize_veto','DNN_veto'])))
    print("Time Veto Efficiency: ", num_time/denom*100)

    num_DNN = ak.sum(ak.mask(events_MC.weight_total, makeEventMaskHighMET(events_MC,['timespread_veto','clusterSize_veto','time_veto'])))
    print("DNN Veto Efficiency: ", num_DNN/denom*100)

    num_clusterSize = ak.sum(ak.mask(events_MC.weight_total, makeEventMaskHighMET(events_MC, ['timespread_veto','DNN_veto','time_veto'])))
    print("ClusterSize Veto Efficiency: ", num_clusterSize/denom*100)

###################################
###################################
Computing Efficiencies for 2022EE
computing high MET efficiencies in MC
MC Denominator:  1744175.9760305993
Forward Veto Efficiency:  100.0
Timespread Veto Efficiency:  79.37065781417553
Time Veto Efficiency:  92.82720015922428
DNN Veto Efficiency:  25.502123578333073
ClusterSize Veto Efficiency:  6.478298096229606


### Helper functions to make histograms and style them appropriately

In [17]:
rt.gStyle.SetOptStat(0)
def make_ratio_plot(h_list_in, title = "", label = "", fit = False, in_tags = None, ratio_bounds = [0.1, 4], logy = False, ratio_index = 0, draw_opt = ['E2','E1'], text = "", scale=False, scales = [1,1]):
    h_list = []
    if in_tags == None:
        tag = []
    else:
        tag = in_tags
    for i, h in enumerate(h_list_in):
        h_list.append(h.Clone('h{}aux{}'.format(i, label)))
        if in_tags == None:
            tag.append(h.GetTitle())
    #print("tags: ", tag)
    c_out = rt.TCanvas("c_out_ratio"+label, "c_out_ratio"+label, 800, 800)
    pad1 = rt.TPad("pad1", "pad1", 0, 0.3, 1, 1.0)
    pad1.SetBottomMargin(0.03)
    pad1.SetLeftMargin(0.15)
    pad1.SetRightMargin(0.04)# pad2.SetGrid()
    if logy:
        pad1.SetLogy()

    pad1.Draw()
    pad1.cd()

    leg = rt.TLegend(0.5, 0.65, 0.9, 0.92)
    leg = rt.TLegend(0.7, 0.65, 0.9, 0.92)

    #leg = rt.TLegend(0.2, 0.7, 0.5, 0.9)
    # leg = rt.TLegend(0.7, 0.2, 0.9, 0.4)
    leg.SetBorderSize(0)
    leg.SetTextSize(0.045)
    leg.SetFillStyle(0)
    c_out.cd(1)

    scaled_h_list = []
    if scale:
        for i, h_unscaled in enumerate(h_list):
            #h = h_unscaled.Clone()
            #h = h_unscaled.Scale(1/scales[i])
            #scaled_h_list.append(h_unscaled.Clone())
            h_unscaled.Scale(1/scales[i])
            scaled_h_list.append(h_unscaled)
    else:
        #for i, h_unscaled in enumerate(h_list):
            #h = h_unscaled.Clone()
            #scaled_h_list.append(h)
        scaled_h_list = h_list
    for i, h in enumerate(scaled_h_list):
        h.GetXaxis().SetLabelSize(0)
        h.GetXaxis().SetTitle(label)
        h.GetYaxis().SetRangeUser(0, 1.1*max(map(lambda x: x.GetMaximum(), scaled_h_list)))
        if logy and not scale:
            h.GetYaxis().SetRangeUser(10e-2, 2*max(map(lambda x: x.GetMaximum(), scaled_h_list)))
        if logy and scale:
            h.GetYaxis().SetRangeUser(10e-4, 1)
        h.GetYaxis().SetTitleOffset(1.0)
        h.GetYaxis().SetTitleSize(0.06)
        h.GetYaxis().SetLabelSize(0.05)
        
        if scale:
            y_title = "Fraction of Events"
        else:
            y_title = "Events"
        
        h.GetYaxis().SetTitle()
        h.SetTitle(f"{title};adsf;{y_title}")
        #if ratio_index == 0:h.DrawCopy("hist")
        '''
        h.SetFillColor(h_list_in[i].GetLineColor())
        h.SetFillStyle(3002)
        #h.SetStats(1)
        h.SetLineColor(h_list_in[i].GetLineColor())
        h.SetLineWidth(2)
        h.SetMarkerColor(h_list_in[i].GetLineColor())
        h.SetMarkerSize(2)
        # if ratio_index == 0:
        #     # h.DrawCopy("hist")
        #     h.DrawCopy(draw_opt[i]+'same')
        # else:h.DrawCopy(draw_opt[i])
        #if ratio_index == 0 :h.DrawCopy(draw_opt[i]+"same")
        #h.DrawCopy("E2 HIST")
        '''
        if i==0:
            h.SetLineWidth(4)
            h.DrawCopy("hist")
            #h.SetFillStyle(0)
            h.SetFillColor(h_list_in[i].GetLineColor())
            h.SetFillStyle(3002)
            #h.SetStats(1)
            h.SetLineColor(h_list_in[i].GetLineColor())
            h.SetLineWidth(2)
            h.SetMarkerColor(h_list_in[i].GetLineColor())
            h.SetMarkerSize(2)
            h.DrawCopy(draw_opt[i] + "same")
            #h.SetFillStyle(0)
        else:
            h.SetLineWidth(2)
            h.DrawCopy(draw_opt[i] + "same")
        #else:h.DrawCopy(draw_opt[i])
        if len(text)>0:
            l = rt.TLatex()
            l.SetTextSize(0.045)
            if logy:l.DrawLatex((h.GetXaxis().GetXmax()-h.GetXaxis().GetXmin())*0.1+h.GetXaxis().GetXmin() , h.GetMaximum()/10, text)
            else:l.DrawLatex((h.GetXaxis().GetXmax()-h.GetXaxis().GetXmin())*0.1+h.GetXaxis().GetXmin() , h.GetMaximum()*0.8, text)
        #if i==1:
            #h.DrawCopy(draw_opt[i]+"same")
       #     h.Draw("E1 same")

        leg.AddEntry(h, tag[i], "lep")
    leg.Draw("same")
    cmsText = rt.TLatex()

    cmsText.SetNDC(True)

    cmsText.SetTextFont(42);  
    cmsText.SetTextSize(0.045);
    cmsText.SetTextAlign(11); 
    cmsText.DrawLatex(0.17, 0.85, "#bf{CMS Work in progress}") 

    c_out.cd()
    pad2 = rt.TPad("pad2", "pad2", 0, 0, 1, 0.3)
    pad2.SetTopMargin(0.03)
    pad2.SetBottomMargin(0.25)
    pad2.SetLeftMargin(0.15)
    pad2.SetRightMargin(0.04)# pad2.SetGrid()
    pad2.Draw()
    pad2.cd()
    band = scaled_h_list[ratio_index].Clone('h_band')
    for j in range(band.GetXaxis().GetNbins()):
        band.SetBinContent(j+1, 1.0)
        if h_list[ratio_index].GetBinContent(j+1) == 0:
            band.SetBinError(j+1, 0.0)
        else:
            band.SetBinError(j+1, scaled_h_list[ratio_index].GetBinError(j+1)/scaled_h_list[ratio_index].GetBinContent(j+1))
            #print(j, h_list_in[0].GetBinError(j+1)/h_list_in[0].GetBinContent(j+1))
    band.SetFillColor(scaled_h_list[ratio_index].GetLineColor())

    band.SetFillStyle(3002)
    band.SetLineColor(scaled_h_list[ratio_index].GetLineColor())
    #band.SetFillColorAlpha(0,0)
    #band.SetLineColor(0)
    
    band.GetYaxis().SetTitleOffset(0.5)
    band.GetYaxis().SetRangeUser(ratio_bounds[0], ratio_bounds[1])
    band.GetYaxis().SetTitleSize(0.11)
    band.GetYaxis().SetLabelSize(0.12)
    band.GetYaxis().SetNdivisions(506)
    band.GetXaxis().SetTitleOffset(0.95)
    band.GetXaxis().SetTitleSize(0.12)
    band.GetXaxis().SetLabelSize(0.12)
    band.GetXaxis().SetTickSize(0.07)
    
    band.SetYTitle('Ratio with {}'.format(tag[ratio_index]))
    band.SetXTitle(label)
    band.SetTitle("")
    band.DrawCopy('E2')
    ln = rt.TLine(h.GetXaxis().GetXmin(), 1, h.GetXaxis().GetXmax(), 1)
    ln.SetLineWidth(3)
    ln.SetLineColor(scaled_h_list[ratio_index].GetLineColor())
    ln.DrawLine(h.GetXaxis().GetXmin(), 1, h.GetXaxis().GetXmax(), 1)
     
    #print(ratio_index)
    for i, h in enumerate(scaled_h_list):
        if i == ratio_index:
            continue
        else:
            if fit:h.GetFunction("expo")
            h.Divide(scaled_h_list[ratio_index])
            # h.GetYaxis().SetTitleOffset(0.6)
            # h.GetYaxis().SetRangeUser(ratio_bounds[0], ratio_bounds[1])
            # h.GetYaxis().SetTitleSize(0.12)
            # h.GetYaxis().SetLabelSize(0.12)
            # h.GetYaxis().SetNdivisions(506)
            # h.GetXaxis().SetTitleOffset(0.95)
            # h.GetXaxis().SetTitleSize(0.12)
            # h.GetXaxis().SetLabelSize(0.12)
            # h.GetXaxis().SetTickSize(0.07)
            # h.SetYTitle('Ratio with {}'.format(tag[0]))
            # h.SetTitle("")
            #set relative error of ratio to be the relative error of data
            for j in range(h.GetXaxis().GetNbins()):
                if h_list[i].GetBinContent(j+1) == 0:
                    h.SetBinError(j+1, 0.0)
                else:
                    h.SetBinError(j+1, h_list_in[i].GetBinError(j+1)/h_list_in[i].GetBinContent(j+1)*h.GetBinContent(j+1))
            h.Draw('same'+draw_opt[i])
    
    pad2.Update()
    
    c_out.pad1 = pad1
    c_out.pad2 = pad2
    c_out.h_list = h_list
    c_out.leg = leg
    
    
    return c_out

In [18]:
#helper function to build histograms

def makeHists(events_data, events_MC, branch, mask_array, bins_tuple, highMET = False):
    print(f"on branch {branch}")
    
    nbins, lowBin, highBin = bins_tuple
    
    #loop over three types of plots (no cuts, just forward veto, all cuts other than that measured)
    name_strs = ["noCuts", "forwardVeto", "modifiedForwardVeto", "allOtherCuts"]
    masks = [[], [], [], mask_array]
    masks_forwardVeto = [[],["forward_veto"],["forward_veto_mod"],["forward_veto_mod"]]
    masks_forwardVeto_highMET = [[],["forward_veto_highMET"],[], []]
    mask_bools = [True, True, True, False]
    
    hist_info = {}
    for plotType, mask_list, mask_list_forwardVeto, mask_list_forwardVeto_highMET, mask_bool in zip(name_strs, masks, masks_forwardVeto, masks_forwardVeto_highMET, mask_bools):
        #compute relevant mask for particular plot
        if plotType=="modifiedForwardVeto" and highMET:continue
        if not highMET:
            mask_data = makeEventMask(events_data, mask_list, mask_list_forwardVeto, mask_bool)
            mask_MC = makeEventMask(events_MC, mask_list, mask_list_forwardVeto, mask_bool)
        else:
            mask_data = makeEventMaskHighMET(events_data, mask_list, mask_list_forwardVeto_highMET, mask_bool)
            mask_MC = makeEventMaskHighMET(events_MC, mask_list, mask_list_forwardVeto_highMET, mask_bool)
    
        data_tree = events_data[mask_data]
        data_tree = data_tree[~ak.is_none(data_tree)]
        
        MC_tree = events_MC[mask_MC]
        MC_tree = MC_tree[~ak.is_none(MC_tree)]
    
        #initialize data and MC histograms
        data = rt.TH1F("Data", "Data", nbinsx=nbins, xlow = lowBin, xup=highBin)
        MC = rt.TH1F("MC", "MC", nbinsx=nbins, xlow = lowBin, xup=highBin)


        #build data hist
        data_arr = np.array(data_tree[branch], dtype=np.float64)
        data_size = np.size(data_arr)
        data_weights = np.ones(data_size, dtype=np.float64)
        data.FillN(data_size, data_arr, data_weights)
        data.SetLineColor(rt.kBlack)
        data.SetFillStyle(0)
        
        #build MC hist
        MC_arr = np.array(MC_tree[branch], dtype=np.float64)
        MC_size = np.size(MC_arr)
        MC_weights = np.array(MC_tree["weight_total"])
        MC.FillN(MC_size, MC_arr, MC_weights)
        MC.SetLineColor(rt.kRed)
        MC.SetFillStyle(0)
        
        sumOfWeights = ak.sum(MC_tree["weight_total"])
        
        hist_info[branch+"_"+plotType] = {"MC_hist": MC, "data_hist": data, 
                                          "MC_weights": sumOfWeights, "data_weights": data_size}
        
    return hist_info
    

In [19]:
#define dictionary with relevant plot info
plot_info = {
#             "ZMass": {"filename_base":"ZMass", "title":"Dimuon Mass Distribution", 
#                        "xlabel":"Dimuon Mass [GeV]", "masks":[],"bins":(80, 0, 150), "logy":False},
#             "puppiMet": {"filename_base":"puppiMet", "title":"PUPPI MET Distribution", 
#                        "xlabel":"PUPPI MET [GeV]", "masks":[],"bins":(60, -5, 100), "logy":False},
#             "puppiMetPhi": {"filename_base":"puppiMetPhi", "title":"PUPPI MET Phi Distribution", 
#                        "xlabel":"PUPPI MET Phi", "masks":[],"bins":(30, -4, 4), "logy":False},
#             "cscRechitClusterMuonDeltaR": {"filename_base":"cluster_muon_deltaR", "title":"deltaR(cluster, muon)", 
#                        "xlabel":"deltaR(cluster, muon)", "masks":[],"bins":(50, 0, 0.5), "logy":False},
            
#             "probe_pT": {"filename_base":"probe_pT", "title":"pT of Muon Matched to Cluster", 
#                        "xlabel":"pT [GeV]", "masks":[],"bins":(25, 0, 100), "logy":False},
#             "tag_pT": {"filename_base":"tag_pT", "title":"pT of Muon Not Matched to Cluster", 
#                        "xlabel":"pT [GeV]", "masks":[],"bins":(25, 0, 100), "logy":False},
#             "probe_phi": {"filename_base":"probe_phi", "title":"Phi of Muon Matched to Cluster", 
#                        "xlabel":"pT [GeV]", "masks":[],"bins":(60, -4, 4), "logy":False},
#             "tag_phi": {"filename_base":"tag_phi", "title":"Phi of Muon Not Matched to Cluster", 
#                        "xlabel":"pT [GeV]", "masks":[],"bins":(60, -4, 4), "logy":False},
#             "probe_eta": {"filename_base":"probe_eta", "title":"Eta of Muon Matched to Cluster", 
#                        "xlabel":"pT [GeV]", "masks":[],"bins":(60, -4, 4), "logy":False},
#             "tag_eta": {"filename_base":"tag_eta", "title":"Eta of Muon Not Matched to Cluster", 
#                        "xlabel":"pT [GeV]", "masks":[],"bins":(60, -4, 4), "logy":False},
            
            "cscRechitClusterSize": {"filename_base":"cscRechitClusterSize", "title":"Cluster Size Distribution", 
                       "xlabel":"N_{hits}", "masks":["clusterSize_veto"],"bins":(40, 0, 400), "logy":True},
            "cscRechitClusterDNN_bkgMC_plusBeamHalo": {"filename_base":"DNN_Score", "title":"DNN Score Distribution", 
                       "xlabel":"DNN Score", "masks":["DNN_veto"],"bins":(12, 0.52, 1), "logy":True},
            "cscRechitClusterTimeWeighted": {"filename_base":"Cluster_Time", "title":"Weighted Cluster Time Distribution", 
                       "xlabel":"Weighted Cluster Time [ns]", "masks":["time_veto"],"bins":(60, -8, 20), "logy":False},
            "cscRechitClusterTimeSpreadWeightedAll": {"filename_base":"Cluster_Timespread", "title":"Weighted Cluster Timespread Distribution", 
                       "xlabel":"Weighted Cluster Timespread [ns]", "masks":["timespread_veto"],"bins":(60, 0, 50), "logy":False},
            "cscRechitClusterEta": {"filename_base":"cluster_eta", "title":"Cluster Eta", 
                       "xlabel":"Eta", "masks":[],"bins":(60, -4, 4), "logy":False},
            "cscRechitClusterPhi": {"filename_base":"cluster_phi", "title":"Cluster Phi", 
                       "xlabel":"Phi", "masks":[],"bins":(60, -4, 4), "logy":False},
            "cscRechitClusterNStation10": {"filename_base":"cluster_NStation10", "title":"Number of Stations with >=10 Rechits", 
                       "xlabel":"# of Stations", "masks":[],"bins":(5, 0, 5), "logy":False}
            
#             "cscRechitClusterXSpread": {"filename_base":"cscRechitClusterXSpread", "title":"Cluster X Spread", 
#                        "xlabel":"X Spread [cm]", "masks":[],"bins":(25, -5, 150), "logy": False},
#             "cscRechitClusterYSpread": {"filename_base":"cscRechitClusterYSpread", "title":"Cluster Y Spread", 
#                        "xlabel":"Y Spread [cm]", "masks":[],"bins":(25, -5, 150), "logy": False},
#             "cscRechitClusterZSpread": {"filename_base":"cscRechitClusterZSpread", "title":"Cluster Z Spread", 
#                        "xlabel":"Z Spread [cm]", "masks":[],"bins":(25, -5, 200), "logy": False},
#             "cscRechitClusterXYSpread": {"filename_base":"cscRechitClusterXYSpread", "title":"Cluster XY Spread", 
#                        "xlabel":"XY Spread [cm]", "masks":[],"bins":(25, -5, 150), "logy": False},
#             "cscRechitClusterRSpread": {"filename_base":"cscRechitClusterRSpread", "title":"Cluster R Spread", 
#                        "xlabel":"R Spread [cm]", "masks":[],"bins":(25, -5, 150), "logy": False},
#             "cscRechitClusterSkewX": {"filename_base":"cscRechitClusterSkewX", "title":"Cluster X Skew", 
#                        "xlabel":"X Skew [cm]", "masks":[],"bins":(25, -150, 150), "logy": False},
#             "cscRechitClusterSkewY": {"filename_base":"cscRechitClusterSkewY", "title":"Cluster Y Skew", 
#                        "xlabel":"Y Skew [cm]", "masks":[],"bins":(25, -150, 150), "logy": False},
#              "cscRechitClusterSkewZ": {"filename_base":"cscRechitClusterSkewZ", "title":"Cluster Z Skew", 
#                        "xlabel":"Z Skew [cm]", "masks":[],"bins":(25, -150, 150), "logy": False},
#             "cscRechitClusterSkewX": {"filename_base":"cscRechitClusterSkewX", "title":"Cluster X Spread", 
#                        "xlabel":"X Skew [cm]", "masks":[],"bins":(25, -150, 150), "logy": False},
#             "cscRechitClusterFracS1": {"filename_base":"cscRechitClusterFracS1", "title":"Fraction of Hits in Station 1", 
#                        "xlabel":"Station 1 Hits/Total Hits", "masks":[],"bins":(25, 0, 1.1), "logy": False},
#             "cscRechitClusterFracS2": {"filename_base":"cscRechitClusterFracS2", "title":"Fraction of Hits in Station 2", 
#                        "xlabel":"Station 2 Hits/Total Hits", "masks":[],"bins":(25, 0, 1.1), "logy": False},
#             "cscRechitClusterFracS3": {"filename_base":"cscRechitClusterFracS3", "title":"Fraction of Hits in Station 3", 
#                        "xlabel":"Station 3 Hits/Total Hits", "masks":[],"bins":(25, 0, 1.1), "logy": False},
#             "cscRechitClusterFracS4": {"filename_base":"cscRechitClusterFracS4", "title":"Fraction of Hits in Station 4", 
#                        "xlabel":"Station 4 Hits/Total Hits", "masks":[],"bins":(25, 0, 1.1), "logy": False},
#             "cscRechitClusterFracR1": {"filename_base":"cscRechitClusterFracR1", "title":"Fraction of Hits in Ring 1", 
#                        "xlabel":"Ring 1 Hits/Total Hits", "masks":[],"bins":(25, 0, 1.1), "logy": False},
#             "cscRechitClusterFracR2": {"filename_base":"cscRechitClusterFracR2", "title":"Fraction of Hits in Ring 2", 
#                        "xlabel":"Ring 2 Hits/Total Hits", "masks":[],"bins":(25, 0, 1.1), "logy": False},
#             "cscRechitClusterFracR3": {"filename_base":"cscRechitClusterFracR3", "title":"Fraction of Hits in Ring 3", 
#                        "xlabel":"Ring 3 Hits/Total Hits", "masks":[],"bins":(25, 0, 1.1), "logy": False}
            
            }

In [None]:
full_individual_plot_info = {}
for campaign in list(events_MC_dict.keys()):
    print(campaign)
    events_MC = events_MC_dict[campaign]
    events_data = events_data_dict[campaign]
    individual_plot_info = {}
    for branch, info_dict in plot_info.items():
        #if branch!="cscRechitClusterSize" and branch!="cscRechitClusterDNN_bkgMC_plusBeamHalo":continue
        filename_base = info_dict["filename_base"]
        hist_dict = makeHists(events_data, events_MC, branch, info_dict["masks"], info_dict["bins"])
        for plot_hists, plot_hist_dict in hist_dict.items():
            individual_plot_info[plot_hists] = {"MC_hist": plot_hist_dict["MC_hist"], "data_hist": plot_hist_dict["data_hist"], 
                        "file_name": plot_hists, "title": info_dict["title"], "label": info_dict["xlabel"], 
                        "scales": [plot_hist_dict["MC_weights"], plot_hist_dict["data_weights"]], "logy": info_dict["logy"]}
    full_individual_plot_info[campaign] = individual_plot_info

2022EE
on branch cscRechitClusterSize


In [None]:
# full_individual_plot_info_highMET = {}
# for campaign in list(events_MC_dict.keys()):
#     print(campaign)
#     events_MC = events_MC_dict[campaign]
#     events_data = events_data_dict[campaign]
#     individual_plot_info_highMET = {}
#     for branch, info_dict in plot_info.items():
#         filename_base = info_dict["filename_base"]
#         hist_dict = makeHists(events_data, events_MC, branch, info_dict["masks"], info_dict["bins"], True)
#         for plot_hists, plot_hist_dict in hist_dict.items():
#             individual_plot_info_highMET[plot_hists] = {"MC_hist": plot_hist_dict["MC_hist"], "data_hist": plot_hist_dict["data_hist"], 
#                         "file_name": plot_hists, "title": info_dict["title"], "label": info_dict["xlabel"], 
#                         "scales": [plot_hist_dict["MC_weights"], plot_hist_dict["data_weights"]], "logy": info_dict["logy"]}
#     full_individual_plot_info_highMET[campaign] = individual_plot_info_highMET

In [None]:

for campaign, individual_plot_info in full_individual_plot_info.items():
    print(campaign)
    plot_output = f"{campaign}_Data_MC_Comp_finalzedSelections"
    os.makedirs(plot_output, exist_ok=True)

    for plot_type, plot_info_dict in individual_plot_info.items():
        print(plot_type)
        for boolScale in [True, False]:
            c = make_ratio_plot([plot_info_dict["MC_hist"], plot_info_dict["data_hist"]], title = plot_info_dict["title"], label = plot_info_dict["label"], fit = False, in_tags = None, ratio_bounds = [0.1, 4], logy = plot_info_dict["logy"], ratio_index = 0, draw_opt = ['E2','E1'], text = "", scale=boolScale, scales = plot_info_dict["scales"])
            if boolScale:
                scaleString = "_normalized"
            else:
                scaleString=""
            os.makedirs(plot_output+"/"+plot_info_dict["file_name"], exist_ok=True)
            c.SaveAs(plot_output+"/"+plot_info_dict["file_name"]+"/"+plot_info_dict["file_name"]+scaleString+".png")
        

In [None]:
# for campaign, individual_plot_info_highMET in full_individual_plot_info_highMET.items():
#     print(campaign)

#     plot_output = f"{campaign}_preBPix_Data_MC_Comp_finalzedSelections_highMET"
#     os.makedirs(plot_output, exist_ok=True)

#     for plot_type, plot_info_dict in individual_plot_info_highMET.items():
#         print(plot_type)
#         for boolScale in [True, False]:
#             c = make_ratio_plot([plot_info_dict["MC_hist"], plot_info_dict["data_hist"]], title = plot_info_dict["title"], label = plot_info_dict["label"], fit = False, in_tags = None, ratio_bounds = [0.1, 4], logy = plot_info_dict["logy"], ratio_index = 0, draw_opt = ['E2','E1'], text = "", scale=boolScale, scales = plot_info_dict["scales"])
#             if boolScale:
#                 scaleString = "_normalized"
#             else:
#                 scaleString=""
#             os.makedirs(plot_output+"/"+plot_info_dict["file_name"], exist_ok=True)
#             c.SaveAs(plot_output+"/"+plot_info_dict["file_name"]+"/"+plot_info_dict["file_name"]+scaleString+".png")


In [None]:
print(full_individual_plot_info["2023BPix"])