In [None]:
import numpy as np
import awkward as ak
import uproot
import matplotlib.pyplot as plt
import hist
import hist.dask as hda
import dask
import coffea.processor as processor
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema

NanoAODSchema.warn_missing_crossrefs = False

In [None]:
import os
os.listdir('.')

In [None]:
# import json

# with open("semileptonic_notebooks/samples.json", 'r') as sample_file:
#     fileset = json.load(sample_file)

# for sample in fileset:
#     print(sample)

fileset = {
    'BulkGravToWW_1000': {
        'files': {
            'root://cmsxrootd.fnal.gov//store/mc/RunIISummer16NanoAODv7/BulkGravToWW_narrow_M-1000_13TeV-madgraph/NANOAODSIM/PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/100000/D4404DCB-FBF8-C640-87B0-2DA1D5139083.root': "Events",
        },
        'metadata': {
            'is_mc': 'Events',
        },
    }
}

In [None]:
test_dataset = 'BulkGravToWW_1000'
events = NanoEventsFactory.from_root(
    fileset[test_dataset]['files'],
    entry_stop = 10000,
    metadata = fileset[test_dataset]['metadata'],
    schemaclass = NanoAODSchema,
    delayed=False,
).events()

In [None]:
events.fields

In [None]:
# Samples from 2018...

muons = events.Muon
electrons = events.Electron

# Exactly one tightly identified electron or muon
electron_pt_threshold = 35
muon_pt_threshold = 30
pt_miss_threshold = 30

# Muon TightHWW selection
muons_tightHWW = muons[
    (muons.pt > 10) &
    (np.abs(muons.eta) < 2.4) &
    (muons.tightId) &
    (
        ((muons.pt < 20) & (np.abs(muons.dxy) < 0.01)) |
        ((muons.pt >= 20) & (np.abs(muons.dxy) < 0.02))
    ) &
    (np.abs(muons.dz) < 0.1) &
    # particle flow isolated: tight or greater
    (muons.pfIsoId >= 4)
]

In [None]:
loose_muons = muons[muons.looseId]
loose_muon_count = ak.num(loose_muons[loose_muons.pt > 10])

loose_electrons = electrons[(electrons.cutBased >= 2)]
loose_electron_count = ak.num(loose_electrons[loose_electrons.pt > 10])

# Mask for vetoing extra loose leptons
loose_lepton_veto_mask = (loose_muon_count + loose_electron_count) == 1

In [None]:
clean_fatJets = events.FatJet[(events.FatJet.pt > 200) & (np.abs(events.FatJet.eta) < 2.4)]



In [None]:
#Jets cuts
clean_Jets = events.Jet[(events.Jet.pt > 30) & (np.abs(events.Jet.eta) < 4.7)]

#Removing AK4(Jet) jets overlapping with AK8(FatJets) jets
# Get all combinations of jets and fatjets in every event
jets_fatjets = ak.cartesian({"x": clean_Jets, "y": clean_fatJets})
# Check that jets satisfy the isolation
jets_iso_f = ((jets_fatjets["x"].eta-jets_fatjets["y"].eta)**2+(jets_fatjets["x"].phi-jets_fatjets["y"].phi)**2>0.8**2)
# Mask the jets_fatjets with the jets_iso_f to get jets isolated from fatjets
jets_fatjets = jets_fatjets[jets_iso_f]
# Separate pairs into jets and fatjets, redefining the jets (but not the fatjets)
jets, fj = ak.unzip(jets_fatjets)

In [None]:

AK8jets_candidates_mask = ak.num(clean_fatJets) >= 1

Wjets_candidates = clean_fatJets[AK8jets_candidates_mask]
leading_W_jet = Wjets_candidates[:, 0]
leading_W_jet_pt = leading_W_jet.pt

leading_W_pt_hist = hist.Hist(hist.axis.StrCategory(name='dataset', label="Dataset", categories=[], growth=True),
                            hist.axis.Regular(name='leading_AK8_pt', label='Leading AK8 p_T (GeV)', bins=80, start=0, stop=4000))
leading_W_pt_hist.fill(dataset=test_dataset, leading_AK8_pt=leading_W_jet_pt)
leading_W_pt_hist.plot1d()

In [None]:
# basic h5 creation
import pandas as pd
# Convert awkward array to numpy array (flat)
leading_W_jet_pt_np = ak.to_numpy(leading_W_jet_pt)

# Optional â€” wrap in a DataFrame for easy labeling
df_leading_W_jet_pt = pd.DataFrame({'leading_W_jet_pt': leading_W_jet_pt_np})


In [None]:
df_leading_W_jet_pt.to_hdf('leading_W_jet_pt.h5', key='df', mode='w')


In [None]:
pd.read_hdf('leading_W_jet_pt.h5')
