In [1]:
import sys

import uproot4 as uproot
import numpy as np
import awkward1 as ak

import gc

import torch
import torch.nn as nn

from sklearn import metrics
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import time

#from ROOT import *

In [81]:
import pandas as pd

In [4]:
def cleandataset(f, defaults):
    # the feature-names are the attributes or columns of interest, in this case: information about Jets
    feature_names = [k for k in f['Events'].keys() if  (('Jet_eta' == k) or ('Jet_pt' == k) or ('Jet_DeepCSV' in k))]
    # tagger output to compare with later and variables used to get the truth output
    feature_names.extend(('Jet_btagDeepB_b','Jet_btagDeepB_bb', 'Jet_btagDeepC','Jet_btagDeepL'))
    feature_names.extend(('Jet_nBHadrons', 'Jet_hadronFlavour'))
    
    
    # go through a specified number of events, and get the information (awkward-arrays) for the keys specified above
    for data in f['Events'].iterate(feature_names, step_size=f['Events'].num_entries, library='ak'):
        break
    
    print(f['Events'].num_entries)
    
    # creating an array to store all the columns with their entries per jet, flatten per-event -> per-jet
    datacolumns = np.zeros((len(feature_names)+1, len(ak.flatten(data['Jet_pt'], axis=1))))
   

    for featureindex in range(len(feature_names)):
        a = ak.flatten(data[feature_names[featureindex]], axis=1) # flatten along first inside to get jets
        
        datacolumns[featureindex] = ak.to_numpy(a)


    nbhad = ak.to_numpy(ak.flatten(data['Jet_nBHadrons'], axis=1))
    hadflav = ak.to_numpy(ak.flatten(data['Jet_hadronFlavour'], axis=1))

    target_class = np.full_like(hadflav, 3)                                                      # udsg
    target_class = np.where(hadflav == 4, 2, target_class)                                       # c
    target_class = np.where(np.bitwise_and(hadflav == 5, nbhad > 1), 1, target_class)            # bb
    target_class = np.where(np.bitwise_and(hadflav == 5, nbhad <= 1), 0, target_class)           # b, lepb

   

    datacolumns[len(feature_names)] = ak.to_numpy(target_class) 

    datavectors = datacolumns.transpose()
    
    
    #print(i)
    for j in range(len(datavectors[0])):
        datavectors[datavectors[:, j] == np.nan]  = defaults[j]
        datavectors[datavectors[:, j] <= -np.inf] = defaults[j]
        datavectors[datavectors[:, j] >= np.inf]  = defaults[j]
        datavectors[datavectors[:, j] == -999]  = defaults[j]  # this one line is new and the reason for that is that there can be "original" -999 defaults in the inputs that should now also move into the new
                                                               # default bin, it was not necessary in my old clean_1_2.py code, because I could just leave them where they are, here they need to to be modified
    
    datavecak = ak.from_numpy(datavectors)
    
    #print(len(datavecak),"entries before cleaning step 1")
    
    #datavecak = datavecak[datavecak[:, 67] >= 0.]
    #datavecak = datavecak[datavecak[:, 67] <= 1.]
    #datavecak = datavecak[datavecak[:, 68] >= 0.]
    #datavecak = datavecak[datavecak[:, 68] <= 1.]
    #datavecak = datavecak[datavecak[:, 69] >= 0.]
    #datavecak = datavecak[datavecak[:, 69] <= 1.]
    #datavecak = datavecak[datavecak[:, 70] >= 0.]
    #datavecak = datavecak[datavecak[:, 70] <= 1.]

    

    # check jetNSelectedTracks, jetNSecondaryVertices > 0
    #datavecak = datavecak[(datavecak[:, 63] > 0) | (datavecak[:, 64] > 0)]  # keep those where at least any of the two variables is > 0, they don't need to be > 0 simultaneously
    #print(len(datavecak),"entries after cleaning step 1")

    alldata = ak.to_numpy(datavecak)
    
        
    
    for track0_vars in [6,12,22,29,35,42,50]:
        alldata[:,track0_vars][alldata[:,64] <= 0] = defaults[track0_vars]
    for track0_1_vars in [7,13,23,30,36,43,51]:
        alldata[:,track0_1_vars][alldata[:,64] <= 1] = defaults[track0_1_vars]
    for track01_2_vars in [8,14,24,31,37,44,52]:
        alldata[:,track01_2_vars][alldata[:,64] <= 2] = defaults[track01_2_vars]
    for track012_3_vars in [9,15,25,32,38,45,53]:
        alldata[:,track012_3_vars][alldata[:,64] <= 3] = defaults[track012_3_vars]
    for track0123_4_vars in [10,16,26,33,39,46,54]:
        alldata[:,track0123_4_vars][alldata[:,64] <= 4] = defaults[track0123_4_vars]
    for track01234_5_vars in [11,17,27,34,40,47,55]:
        alldata[:,track01234_5_vars][alldata[:,64] <= 5] = defaults[track01234_5_vars]
    alldata[:,18][alldata[:,65] <= 0] = defaults[18]
    alldata[:,19][alldata[:,65] <= 1] = defaults[19]
    alldata[:,20][alldata[:,65] <= 2] = defaults[20]
    alldata[:,21][alldata[:,65] <= 3] = defaults[21]

    for AboveCharm_vars in [41,48,49,56]:
        alldata[:,AboveCharm_vars][alldata[:,AboveCharm_vars]==-1] = defaults[AboveCharm_vars] 
    
    
    datacls = [i for i in range(0,67)]
    datacls.append(73)
    dataset = alldata[:, datacls]
    
    #DeepCSV_dataset = alldata[:, 67:71]
    
    return dataset
#, DeepCSV_dataset

In [5]:
minima = np.load('/nfs/dust/cms/user/anstein/additional_files/default_value_studies_minima.npy')
defaults = minima - 0.001

## New PostProcessed file
### Working 2021-04-30 

In [92]:
pathPostProc = "/pnfs/desy.de/cms/tier2/store/user/andrey/VHccPostProcV15_NanoV7/2017/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/PostProc_V15_Mar2021_coli-Nano_115/210327_230455/0000/tree_1.root"

In [93]:
filePostProc = uproot.open(pathPostProc)

In [16]:
filePostProc.keys()

['autoPU;1',
 'Events;1',
 'Runs;1',
 'ParameterSets;1',
 'LuminosityBlocks;1',
 'MetaData;1',
 'untagged;1']

In [21]:
availableJetKeys = [key for key in filePostProc['Events'].keys() if "deepcsv" in key]
availableJetKeys

['Jet_btagSF_deepcsv_M_down',
 'Jet_btagSF_deepcsv_M',
 'Jet_btagSF_deepcsv_M_up',
 'Jet_btagSF_deepcsv_shape_down_hf',
 'Jet_btagSF_deepcsv_shape',
 'Jet_btagSF_deepcsv_shape_up_cferr1',
 'Jet_btagSF_deepcsv_shape_up_jes',
 'Jet_btagSF_deepcsv_shape_down_cferr2',
 'Jet_btagSF_deepcsv_shape_up_lf',
 'Jet_btagSF_deepcsv_shape_down_lf',
 'Jet_btagSF_deepcsv_shape_down_cferr1',
 'Jet_btagSF_deepcsv_shape_up_lfstats1',
 'Jet_btagSF_deepcsv_shape_up_lfstats2',
 'Jet_btagSF_deepcsv_shape_up_hfstats1',
 'Jet_btagSF_deepcsv_shape_up_hfstats2',
 'Jet_btagSF_deepcsv_shape_down_lfstats2',
 'Jet_btagSF_deepcsv_shape_up_hf',
 'Jet_btagSF_deepcsv_shape_down_lfstats1',
 'Jet_btagSF_deepcsv_shape_down_jes',
 'Jet_btagSF_deepcsv_shape_down_hfstats2',
 'Jet_btagSF_deepcsv_shape_down_hfstats1',
 'Jet_btagSF_deepcsv_shape_up_cferr2']

In [35]:
availableJetKeys = [key for key in filePostProc['Events'].keys() if "deep" in key]
availableJetKeys

['FatJet_deepTagMD_H4qvsQCD',
 'FatJet_deepTagMD_HbbvsQCD',
 'FatJet_deepTagMD_TvsQCD',
 'FatJet_deepTagMD_WvsQCD',
 'FatJet_deepTagMD_ZHbbvsQCD',
 'FatJet_deepTagMD_ZHccvsQCD',
 'FatJet_deepTagMD_ZbbvsQCD',
 'FatJet_deepTagMD_ZvsQCD',
 'FatJet_deepTagMD_bbvsLight',
 'FatJet_deepTagMD_ccvsLight',
 'FatJet_deepTag_H',
 'FatJet_deepTag_QCD',
 'FatJet_deepTag_QCDothers',
 'FatJet_deepTag_TvsQCD',
 'FatJet_deepTag_WvsQCD',
 'FatJet_deepTag_ZvsQCD',
 'Jet_btagSF_deepcsv_M_down',
 'Jet_btagSF_deepcsv_M',
 'Jet_btagSF_deepcsv_M_up',
 'Jet_btagSF_deepcsv_shape_down_hf',
 'Jet_btagSF_deepcsv_shape',
 'Jet_btagSF_deepcsv_shape_up_cferr1',
 'Jet_btagSF_deepcsv_shape_up_jes',
 'Jet_btagSF_deepcsv_shape_down_cferr2',
 'Jet_btagSF_deepcsv_shape_up_lf',
 'Jet_btagSF_deepcsv_shape_down_lf',
 'Jet_btagSF_deepcsv_shape_down_cferr1',
 'Jet_btagSF_deepcsv_shape_up_lfstats1',
 'Jet_btagSF_deepcsv_shape_up_lfstats2',
 'Jet_btagSF_deepcsv_shape_up_hfstats1',
 'Jet_btagSF_deepcsv_shape_up_hfstats2',
 'Jet_bta

In [33]:
availableJetKeys = [key for key in filePostProc['Events'].keys() if "CSV" in key]
availableJetKeys

['AK15PuppiSubJet_btagCSVV2',
 'AK15Puppi_btagCSVV2',
 'btagWeight_CSVV2',
 'btagWeight_DeepCSVB',
 'FatJet_btagCSVV2',
 'Jet_btagCSVV2',
 'SubJet_btagCSVV2',
 'HLT_PFMET100_PFMHT100_IDTight_CaloBTagCSV_3p1',
 'HLT_PFMET110_PFMHT110_IDTight_CaloBTagCSV_3p1',
 'HLT_PFMET120_PFMHT120_IDTight_CaloBTagCSV_3p1',
 'HLT_PFMET130_PFMHT130_IDTight_CaloBTagCSV_3p1',
 'HLT_PFMET140_PFMHT140_IDTight_CaloBTagCSV_3p1',
 'HLT_Ele15_IsoVVVL_PFHT450_CaloBTagCSV_4p5',
 'HLT_Mu15_IsoVVVL_PFHT450_CaloBTagCSV_4p5']

In [30]:
availableJetKeys = [key for key in filePostProc['Events'].keys() if "Jet_D" in key]
availableJetKeys

['Jet_DeepFlavCvsL', 'Jet_DeepFlavCvsB']

In [31]:
availableJetKeys = [key for key in filePostProc['Events'].keys() if "Jet_d" in key]
availableJetKeys

['FatJet_deepTagMD_H4qvsQCD',
 'FatJet_deepTagMD_HbbvsQCD',
 'FatJet_deepTagMD_TvsQCD',
 'FatJet_deepTagMD_WvsQCD',
 'FatJet_deepTagMD_ZHbbvsQCD',
 'FatJet_deepTagMD_ZHccvsQCD',
 'FatJet_deepTagMD_ZbbvsQCD',
 'FatJet_deepTagMD_ZvsQCD',
 'FatJet_deepTagMD_bbvsLight',
 'FatJet_deepTagMD_ccvsLight',
 'FatJet_deepTag_H',
 'FatJet_deepTag_QCD',
 'FatJet_deepTag_QCDothers',
 'FatJet_deepTag_TvsQCD',
 'FatJet_deepTag_WvsQCD',
 'FatJet_deepTag_ZvsQCD']

In [28]:
availableJetKeys = [key for key in filePostProc['Events'].keys() if "Jet_" in key]
availableJetKeys

['AK15PuppiSubJet_area',
 'AK15PuppiSubJet_btagCSVV2',
 'AK15PuppiSubJet_btagDeepB',
 'AK15PuppiSubJet_btagJP',
 'AK15PuppiSubJet_eta',
 'AK15PuppiSubJet_mass',
 'AK15PuppiSubJet_phi',
 'AK15PuppiSubJet_pt',
 'AK15PuppiSubJet_rawFactor',
 'AK15PuppiSubJet_nBHadrons',
 'AK15PuppiSubJet_nCHadrons',
 'CorrT1METJet_area',
 'CorrT1METJet_eta',
 'CorrT1METJet_muonSubtrFactor',
 'CorrT1METJet_phi',
 'CorrT1METJet_rawPt',
 'FatJet_ParticleNetMD_probQCDb',
 'FatJet_ParticleNetMD_probQCDbb',
 'FatJet_ParticleNetMD_probQCDc',
 'FatJet_ParticleNetMD_probQCDcc',
 'FatJet_ParticleNetMD_probQCDothers',
 'FatJet_ParticleNetMD_probXbb',
 'FatJet_ParticleNetMD_probXcc',
 'FatJet_ParticleNetMD_probXqq',
 'FatJet_area',
 'FatJet_btagCMVA',
 'FatJet_btagCSVV2',
 'FatJet_btagDDBvL',
 'FatJet_btagDDBvL_noMD',
 'FatJet_btagDDCvB',
 'FatJet_btagDDCvB_noMD',
 'FatJet_btagDDCvL',
 'FatJet_btagDDCvL_noMD',
 'FatJet_btagDeepB',
 'FatJet_btagHbb',
 'FatJet_deepTagMD_H4qvsQCD',
 'FatJet_deepTagMD_HbbvsQCD',
 'FatJet

In [18]:
cleandataset(filePostProc, defaults)

KeyInFileError: not found: 'Jet_btagDeepB_b'
in file /pnfs/desy.de/cms/tier2/store/user/andrey/VHccPostProcV15_NanoV7/2017/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/PostProc_V15_Mar2021_coli-Nano_115/210327_230455/0000/tree_1.root
in object /Events;1

## Outdated file! (Used previously when running VHcc-cTagSF for the first time)

In [None]:
cleandataset(uproot.open("/pnfs/desy.de/cms/tier2/store/user/spmondal/VHccPostV11_Nanov5_Jul19/2017/DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIFall17NanoAODv5-PU2017REC16/190726_135011/0000/tree_1.root"), defaults)

## Stock Nano file used at HPC to train models
### Note: different variable names

In [2]:
pathStock = "root://grid-cms-xrootd.physik.rwth-aachen.de:1094//store/user/anovak/PFNano/106X_v2_17/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/RunIIFall17PFNanoAODv2-PU2017_12Apr2018_new_pmx_94X_mc2017_realistic_v14-v1PFNanoV2/210101_174326/0001/nano_mc2017_1-1156.root"

In [3]:
fileStock = uproot.open(pathStock)

In [125]:
fileStock2 = uproot.open(pathStock)

In [4]:
fileStock.keys()

['tag;1',
 'Events;1',
 'LuminosityBlocks;1',
 'Runs;1',
 'MetaData;1',
 'ParameterSets;1']

In [56]:
fileStock2['Runs']['genEventSumw']

<TBranch 'genEventSumw' at 0x7faa447157c0>

In [59]:
checkotherkeys9 = [key for key in fileStock2['Runs'].keys() if "genEvent" in key]
sorted(checkotherkeys9)

['genEventCount', 'genEventSumw', 'genEventSumw2']

In [70]:
checkotherkeys9 = [key for key in fileStock2['Events'].keys() if "Pileup" in key]
sorted(checkotherkeys9)

['Pileup_gpudensity',
 'Pileup_nPU',
 'Pileup_nTrueInt',
 'Pileup_pudensity',
 'Pileup_sumEOOT',
 'Pileup_sumLOOT']

In [62]:
availableJetKeys = [key for key in fileStock2['Events'].keys() if "Jet_" in key]

In [41]:
checkotherkeys2 = [key for key in fileStock['Events'].keys() if "Gen" in key]
sorted(checkotherkeys2)

['Flag_BadPFMuonFilter',
 'Flag_BadPFMuonSummer16Filter',
 'HLT_Dimuon0_Jpsi3p5_Muon2',
 'HLT_Dimuon0_Upsilon_Muon_L1_TM0',
 'HLT_Dimuon0_Upsilon_Muon_NoL1Mass',
 'HLT_Trimuon5_3p5_2_Upsilon_Muon',
 'Jet_nMuons',
 'Muon_charge',
 'Muon_cleanmask',
 'Muon_dxy',
 'Muon_dxyErr',
 'Muon_dxybs',
 'Muon_dz',
 'Muon_dzErr',
 'Muon_eta',
 'Muon_fsrPhotonIdx',
 'Muon_genPartFlav',
 'Muon_genPartIdx',
 'Muon_highPtId',
 'Muon_highPurity',
 'Muon_inTimeMuon',
 'Muon_ip3d',
 'Muon_isGlobal',
 'Muon_isPFcand',
 'Muon_isTracker',
 'Muon_jetIdx',
 'Muon_jetNDauCharged',
 'Muon_jetPtRelv2',
 'Muon_jetRelIso',
 'Muon_looseId',
 'Muon_mass',
 'Muon_mediumId',
 'Muon_mediumPromptId',
 'Muon_miniIsoId',
 'Muon_miniPFRelIso_all',
 'Muon_miniPFRelIso_chg',
 'Muon_multiIsoId',
 'Muon_mvaId',
 'Muon_mvaLowPt',
 'Muon_mvaLowPtId',
 'Muon_mvaTTH',
 'Muon_nStations',
 'Muon_nTrackerLayers',
 'Muon_pdgId',
 'Muon_pfIsoId',
 'Muon_pfRelIso03_all',
 'Muon_pfRelIso03_chg',
 'Muon_pfRelIso04_all',
 'Muon_phi',
 'Muon

In [42]:
checkotherkeys3 = [key for key in fileStock['Events'].keys() if "Weight" in key]
sorted(checkotherkeys3)

['L1PreFiringWeight_Dn',
 'L1PreFiringWeight_Nom',
 'L1PreFiringWeight_Up',
 'LHEPdfWeight',
 'LHEReweightingWeight',
 'LHEScaleWeight',
 'LHEWeight_originalXWGTUP',
 'PFCands_puppiWeight',
 'PFCands_puppiWeightNoLep',
 'PSWeight',
 'btagWeight_CSVV2',
 'btagWeight_DeepCSVB',
 'genWeight',
 'nLHEPdfWeight',
 'nLHEReweightingWeight',
 'nLHEScaleWeight',
 'nPSWeight']

In [46]:
checkotherkeys4 = [key for key in fileStock['Events'].keys() if "weight" in key]
sorted(checkotherkeys4)

['Generator_weight', 'LHEReweightingWeight', 'nLHEReweightingWeight']

In [39]:
checkotherkeys = [key for key in fileStock['Events'].keys() if "Gen" in key]
sorted(checkotherkeys)

['GenCands_charge',
 'GenCands_eta',
 'GenCands_mass',
 'GenCands_pdgId',
 'GenCands_phi',
 'GenCands_pt',
 'GenDressedLepton_eta',
 'GenDressedLepton_hasTauAnc',
 'GenDressedLepton_mass',
 'GenDressedLepton_pdgId',
 'GenDressedLepton_phi',
 'GenDressedLepton_pt',
 'GenFatJetCands_jetIdx',
 'GenFatJetCands_pFCandsIdx',
 'GenFatJetSVs_jetIdx',
 'GenFatJetSVs_sVIdx',
 'GenIsolatedPhoton_eta',
 'GenIsolatedPhoton_mass',
 'GenIsolatedPhoton_phi',
 'GenIsolatedPhoton_pt',
 'GenJetAK8_eta',
 'GenJetAK8_hadronFlavour',
 'GenJetAK8_mass',
 'GenJetAK8_partonFlavour',
 'GenJetAK8_phi',
 'GenJetAK8_pt',
 'GenJetCands_jetIdx',
 'GenJetCands_pFCandsIdx',
 'GenJetSVs_jetIdx',
 'GenJetSVs_sVIdx',
 'GenJet_eta',
 'GenJet_hadronFlavour',
 'GenJet_mass',
 'GenJet_partonFlavour',
 'GenJet_phi',
 'GenJet_pt',
 'GenMET_phi',
 'GenMET_pt',
 'GenPart_eta',
 'GenPart_genPartIdxMother',
 'GenPart_mass',
 'GenPart_pdgId',
 'GenPart_phi',
 'GenPart_pt',
 'GenPart_status',
 'GenPart_statusFlags',
 'GenVisTau_char

In [49]:
checkmorekeys6 = [key for key in fileStock['Events'].keys() if "SoftAc" in key]
sorted(checkmorekeys6)

['SoftActivityJetHT',
 'SoftActivityJetHT10',
 'SoftActivityJetHT2',
 'SoftActivityJetHT5',
 'SoftActivityJetNjets10',
 'SoftActivityJetNjets2',
 'SoftActivityJetNjets5',
 'SoftActivityJet_eta',
 'SoftActivityJet_phi',
 'SoftActivityJet_pt',
 'nSoftActivityJet']

In [40]:
checkmorekeys = [key for key in fileStock['Events'].keys() if "HLT" in key]
sorted(checkmorekeys)

['HLT_AK4CaloJet100',
 'HLT_AK4CaloJet120',
 'HLT_AK4CaloJet30',
 'HLT_AK4CaloJet40',
 'HLT_AK4CaloJet50',
 'HLT_AK4CaloJet80',
 'HLT_AK4PFJet100',
 'HLT_AK4PFJet120',
 'HLT_AK4PFJet30',
 'HLT_AK4PFJet50',
 'HLT_AK4PFJet80',
 'HLT_AK8PFHT750_TrimMass50',
 'HLT_AK8PFHT800_TrimMass50',
 'HLT_AK8PFHT850_TrimMass50',
 'HLT_AK8PFHT900_TrimMass50',
 'HLT_AK8PFJet140',
 'HLT_AK8PFJet200',
 'HLT_AK8PFJet260',
 'HLT_AK8PFJet320',
 'HLT_AK8PFJet330_PFAK8BTagCSV_p1',
 'HLT_AK8PFJet330_PFAK8BTagCSV_p17',
 'HLT_AK8PFJet360_TrimMass30',
 'HLT_AK8PFJet380_TrimMass30',
 'HLT_AK8PFJet40',
 'HLT_AK8PFJet400',
 'HLT_AK8PFJet400_TrimMass30',
 'HLT_AK8PFJet420_TrimMass30',
 'HLT_AK8PFJet450',
 'HLT_AK8PFJet500',
 'HLT_AK8PFJet550',
 'HLT_AK8PFJet60',
 'HLT_AK8PFJet80',
 'HLT_AK8PFJetFwd140',
 'HLT_AK8PFJetFwd200',
 'HLT_AK8PFJetFwd260',
 'HLT_AK8PFJetFwd320',
 'HLT_AK8PFJetFwd40',
 'HLT_AK8PFJetFwd400',
 'HLT_AK8PFJetFwd450',
 'HLT_AK8PFJetFwd500',
 'HLT_AK8PFJetFwd60',
 'HLT_AK8PFJetFwd80',
 'HLT_BTagMu_A

In [37]:
checkkeys = [key for key in fileStock['Events'].keys() if "Jet" in key]
sorted(checkkeys)

['CorrT1METJet_area',
 'CorrT1METJet_eta',
 'CorrT1METJet_muonSubtrFactor',
 'CorrT1METJet_phi',
 'CorrT1METJet_rawPt',
 'FatJetPFCands_btagEtaRel',
 'FatJetPFCands_btagJetDistVal',
 'FatJetPFCands_btagPParRatio',
 'FatJetPFCands_btagPtRatio',
 'FatJetPFCands_btagSip3dSig',
 'FatJetPFCands_btagSip3dVal',
 'FatJetPFCands_jetIdx',
 'FatJetPFCands_pFCandsIdx',
 'FatJetPFCands_pt',
 'FatJetSVs_chi2',
 'FatJetSVs_costhetasvpv',
 'FatJetSVs_d3d',
 'FatJetSVs_d3dsig',
 'FatJetSVs_deltaR',
 'FatJetSVs_dxy',
 'FatJetSVs_dxysig',
 'FatJetSVs_enration',
 'FatJetSVs_jetIdx',
 'FatJetSVs_mass',
 'FatJetSVs_normchi2',
 'FatJetSVs_ntracks',
 'FatJetSVs_phirel',
 'FatJetSVs_pt',
 'FatJetSVs_ptrel',
 'FatJetSVs_sVIdx',
 'FatJet_DDX_jetNSecondaryVertices',
 'FatJet_DDX_jetNTracks',
 'FatJet_DDX_tau1_flightDistance2dSig',
 'FatJet_DDX_tau1_trackEtaRel_0',
 'FatJet_DDX_tau1_trackEtaRel_1',
 'FatJet_DDX_tau1_trackEtaRel_2',
 'FatJet_DDX_tau1_trackSip3dSig_0',
 'FatJet_DDX_tau1_trackSip3dSig_1',
 'FatJet_DD

In [15]:
[key for key in fileStock['Events'].keys() if "MET" in key]

['CaloMET_phi',
 'CaloMET_pt',
 'CaloMET_sumEt',
 'ChsMET_phi',
 'ChsMET_pt',
 'ChsMET_sumEt',
 'nCorrT1METJet',
 'CorrT1METJet_area',
 'CorrT1METJet_eta',
 'CorrT1METJet_muonSubtrFactor',
 'CorrT1METJet_phi',
 'CorrT1METJet_rawPt',
 'DeepMETResolutionTune_phi',
 'DeepMETResolutionTune_pt',
 'DeepMETResponseTune_phi',
 'DeepMETResponseTune_pt',
 'METFixEE2017_MetUnclustEnUpDeltaX',
 'METFixEE2017_MetUnclustEnUpDeltaY',
 'METFixEE2017_covXX',
 'METFixEE2017_covXY',
 'METFixEE2017_covYY',
 'METFixEE2017_phi',
 'METFixEE2017_pt',
 'METFixEE2017_significance',
 'METFixEE2017_sumEt',
 'METFixEE2017_sumPtUnclustered',
 'GenMET_phi',
 'GenMET_pt',
 'MET_MetUnclustEnUpDeltaX',
 'MET_MetUnclustEnUpDeltaY',
 'MET_covXX',
 'MET_covXY',
 'MET_covYY',
 'MET_phi',
 'MET_pt',
 'MET_significance',
 'MET_sumEt',
 'MET_sumPtUnclustered',
 'PuppiMET_phi',
 'PuppiMET_phiJERDown',
 'PuppiMET_phiJERUp',
 'PuppiMET_phiJESDown',
 'PuppiMET_phiJESUp',
 'PuppiMET_phiUnclusteredDown',
 'PuppiMET_phiUnclusteredUp

In [7]:
availableJetKeys

['CorrT1METJet_area',
 'CorrT1METJet_eta',
 'CorrT1METJet_muonSubtrFactor',
 'CorrT1METJet_phi',
 'CorrT1METJet_rawPt',
 'FatJet_area',
 'FatJet_btagCMVA',
 'FatJet_btagCSVV2',
 'FatJet_btagDDBvL',
 'FatJet_btagDDBvLV2',
 'FatJet_btagDDBvL_noMD',
 'FatJet_btagDDCvB',
 'FatJet_btagDDCvBV2',
 'FatJet_btagDDCvB_noMD',
 'FatJet_btagDDCvL',
 'FatJet_btagDDCvLV2',
 'FatJet_btagDDCvL_noMD',
 'FatJet_btagDeepB',
 'FatJet_btagHbb',
 'FatJet_deepTagMD_H4qvsQCD',
 'FatJet_deepTagMD_HbbvsQCD',
 'FatJet_deepTagMD_TvsQCD',
 'FatJet_deepTagMD_WvsQCD',
 'FatJet_deepTagMD_ZHbbvsQCD',
 'FatJet_deepTagMD_ZHccvsQCD',
 'FatJet_deepTagMD_ZbbvsQCD',
 'FatJet_deepTagMD_ZvsQCD',
 'FatJet_deepTagMD_bbvsLight',
 'FatJet_deepTagMD_ccvsLight',
 'FatJet_deepTag_H',
 'FatJet_deepTag_QCD',
 'FatJet_deepTag_QCDothers',
 'FatJet_deepTag_TvsQCD',
 'FatJet_deepTag_WvsQCD',
 'FatJet_deepTag_ZvsQCD',
 'FatJet_eta',
 'FatJet_mass',
 'FatJet_msoftdrop',
 'FatJet_n2b1',
 'FatJet_n3b1',
 'FatJet_particleNetMD_QCD',
 'FatJet_pa

In [5]:
fileStock['Events'].keys()

['run',
 'luminosityBlock',
 'event',
 'HTXS_Higgs_pt',
 'HTXS_Higgs_y',
 'HTXS_stage1_1_cat_pTjet25GeV',
 'HTXS_stage1_1_cat_pTjet30GeV',
 'HTXS_stage1_1_fine_cat_pTjet25GeV',
 'HTXS_stage1_1_fine_cat_pTjet30GeV',
 'HTXS_stage1_2_cat_pTjet25GeV',
 'HTXS_stage1_2_cat_pTjet30GeV',
 'HTXS_stage1_2_fine_cat_pTjet25GeV',
 'HTXS_stage1_2_fine_cat_pTjet30GeV',
 'HTXS_stage_0',
 'HTXS_stage_1_pTjet25',
 'HTXS_stage_1_pTjet30',
 'HTXS_njets25',
 'HTXS_njets30',
 'btagWeight_CSVV2',
 'btagWeight_DeepCSVB',
 'CaloMET_phi',
 'CaloMET_pt',
 'CaloMET_sumEt',
 'ChsMET_phi',
 'ChsMET_pt',
 'ChsMET_sumEt',
 'nCorrT1METJet',
 'CorrT1METJet_area',
 'CorrT1METJet_eta',
 'CorrT1METJet_muonSubtrFactor',
 'CorrT1METJet_phi',
 'CorrT1METJet_rawPt',
 'nJetPFCands',
 'JetPFCands_pt',
 'JetPFCands_btagEtaRel',
 'JetPFCands_btagPtRatio',
 'JetPFCands_btagPParRatio',
 'JetPFCands_btagSip3dVal',
 'JetPFCands_btagSip3dSig',
 'JetPFCands_btagJetDistVal',
 'JetPFCands_pFCandsIdx',
 'JetPFCands_jetIdx',
 'nJetSVs',
 '

In [122]:
if False <= 0:
    print('yep')

yep


In [126]:
for data in fileStock2['Events'].iterate(["Muon_mass","Muon_pt","Muon_eta","Muon_tightId","Muon_pfRelIso04_all"], step_size=fileStock2['Events'].num_entries, library='pd'):
    break

In [127]:
data[:19]

Unnamed: 0_level_0,Unnamed: 1_level_0,Muon_mass,Muon_pt,Muon_eta,Muon_tightId,Muon_pfRelIso04_all
entry,subentry,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0,0.105713,6.894824,1.462891,True,1.174873
1,0,0.105713,5.258268,1.118652,True,5.6187
3,0,0.105713,4.776484,-0.321838,True,17.226768
8,0,0.105713,18.625374,1.285889,True,0.055665
12,0,0.105713,5.772525,2.180176,False,1.281596
12,1,0.105713,3.461341,-1.438965,False,13.395839
14,0,0.105713,7.847552,-0.616699,True,3.325467
15,0,0.105713,40.94487,-1.272705,True,0.063264
16,0,0.105713,4.679722,0.69458,False,11.868073
16,1,0.105713,4.336269,0.583496,False,6.067607


In [120]:
for dataE in fileStock2['Events'].iterate(["Electron_mass","Electron_eta","Electron_pt","Electron_mvaFall17V2Iso_WP80"], step_size=fileStock2['Events'].num_entries, library='pd'):
    break

In [121]:
dataE[:19]

Unnamed: 0_level_0,Unnamed: 1_level_0,Electron_mass,Electron_eta,Electron_pt,Electron_mvaFall17V2Iso_WP80
entry,subentry,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0,-0.034546,0.795288,71.86467,False
0,1,0.000376,-0.457886,8.973927,False
1,0,0.007744,0.393982,26.001202,False
1,1,0.005825,0.288574,26.221628,True
3,0,-0.000158,-0.844849,42.065136,False
3,1,-0.002628,0.518433,25.802813,True
4,0,-0.014015,-1.248047,34.197655,False
6,0,0.011269,-2.043945,25.214291,False
6,1,-0.004044,0.995361,14.241823,False
7,0,-0.017731,-1.057373,41.963039,False


In [108]:
for dataJet in fileStock2['Events'].iterate("Jet_pt", step_size=fileStock2['Events'].num_entries, library='pd'):
    break

In [110]:
dataJet

Unnamed: 0_level_0,Unnamed: 1_level_0,Jet_pt
entry,subentry,Unnamed: 2_level_1
0,0,82.187500
0,1,70.125000
0,2,49.375000
0,3,33.187500
0,4,32.625000
...,...,...
11691,3,39.562500
11691,4,37.281250
11691,5,34.312500
11691,6,22.453125


In [109]:
dataJet[:19]

Unnamed: 0_level_0,Unnamed: 1_level_0,Jet_pt
entry,subentry,Unnamed: 2_level_1
0,0,82.1875
0,1,70.125
0,2,49.375
0,3,33.1875
0,4,32.625
0,5,31.953125
0,6,30.046875
0,7,15.554688
1,0,148.0
1,1,114.5625


In [95]:
for dataN in filePostProc['Events'].iterate("Muon_mass", step_size=filePostProc['Events'].num_entries, library='pd'):
    break

In [101]:
dataN[:19]

Unnamed: 0_level_0,Unnamed: 1_level_0,Muon_mass
entry,subentry,Unnamed: 2_level_1
0,0,0.105713
0,1,0.105713
1,0,0.105713
1,1,0.105713
1,2,0.105713
3,0,0.105713
4,0,0.105713
4,1,0.105713
5,0,0.105713
6,0,0.105713


In [97]:
for dataNE in filePostProc['Events'].iterate("Electron_mass", step_size=filePostProc['Events'].num_entries, library='pd'):
    break

In [103]:
dataNE[:19]

Unnamed: 0_level_0,Unnamed: 1_level_0,Electron_mass
entry,subentry,Unnamed: 2_level_1
0,0,0.006355
0,1,-0.009399
0,2,-0.003695
2,0,-0.012894
3,0,0.016617
3,1,0.005978
5,0,0.0056
5,1,0.003702
7,0,0.004189
7,1,0.004261


In [99]:
for dataJetN in filePostProc['Events'].iterate("Jet_pt", step_size=filePostProc['Events'].num_entries, library='pd'):
    break

In [100]:
dataJetN[:19]

Unnamed: 0_level_0,Unnamed: 1_level_0,Jet_pt
entry,subentry,Unnamed: 2_level_1
0,0,101.25
0,1,78.0
0,2,76.625
0,3,56.8125
0,4,24.734375
0,5,23.21875
0,6,22.96875
0,7,21.875
0,8,19.84375
1,0,197.25


In [111]:
dataJetN

Unnamed: 0_level_0,Unnamed: 1_level_0,Jet_pt
entry,subentry,Unnamed: 2_level_1
0,0,101.250000
0,1,78.000000
0,2,76.625000
0,3,56.812500
0,4,24.734375
...,...,...
129341,2,59.531250
129341,3,41.062500
129341,4,32.343750
129341,5,17.562500


In [None]:
cleandataset(uproot.open("root://grid-cms-xrootd.physik.rwth-aachen.de:1094//store/user/anovak/PFNano/106X_v2_17/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/RunIIFall17PFNanoAODv2-PU2017_12Apr2018_new_pmx_94X_mc2017_realistic_v14-v1PFNanoV2/210101_174326/0001/nano_mc2017_1-1156.root"), defaults)