Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 141 additions & 9 deletions bin/qcd_from_data
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,159 @@ data_file:
mc_file:
path to MC file, used to normalisation of data histograms

output_file:
path to the output file
subtract_other_samples:
dictionary of (sample, file) to be removed from the template control region (data_file)

histogram_path:
source of histograms. All sub-paths will be considered (recursive)

ignore_subpaths:
sub-paths of histogram_path to be ignored

normalisation_keyword_in_path:
part of the path that determines the normalisation (usually signal) region

shape_keyword_in_path:
part of the path that determines the template (usually control/sideband) region

shape_btag:
b-tag multiplicity to be used for the template region

shape_btag_for_exceptions:
b-tag multiplicity to be used for the template region for exceptions (see shape_btag_exceptions)

data_histograms:
paths to data histograms (from data file)
shape_btag_exceptions:
list of histogram names that have a different b-tag multiplicity for the template region

mc_histograms:
paths to MC histograms (from MC file)
remove_for_shape:
part of the histogram name to be removed fromt the template histogram name.
Useful for re-weighted histograms (mc-only).

output_file:
path to the output file

output_histograms:
paths of the output histograms
Uses the data_file to extract the templates, removes other samples
(subtract_other_samples) and normalises it according to mc_file.

'''
from ROOT import gROOT
gcd = gROOT.cd
from optparse import OptionParser
from tools.file_utilities import write_data_to_JSON, read_data_from_JSON
from tools.ROOT_utililities import root_mkdir, find_btag, get_histogram_dictionary
from tools.hist_utilities import clean_control_region
from rootpy.io import root_open

def main():
print "Welcome to the QCD-from-data merging script"
print 'Please take a seat while the code is being developed.'
print 'Once finished you will be able to create a single file using shapes from data and normalisation from MC'
print 'In the meantime have a look at the script usage'
print
print __doc__
options, input_values_sets, json_input_files = parse_options()
if options.test:
input_values_sets = [setup_test_values()]
json_input_files = ['test.json']

for input_values, json_file in zip(input_values_sets, json_input_files):
print 'Processing', json_file
create_qcd_file(input_values)

def parse_options():
parser = OptionParser( __doc__ )
parser.add_option( "-t", "--test", dest = "test", action = "store_true",
help = "Run with test values and write them to test.json" )
( options, args ) = parser.parse_args()

input_values_sets = []
json_input_files = []
add_set = input_values_sets.append
add_json_file = json_input_files.append
if not options.test:
for arg in args:
input_values = read_data_from_JSON(arg)
add_set(input_values)
add_json_file(arg)

return options, input_values_sets, json_input_files

def create_qcd_file(input_values):
data_file = input_values['data_file']
mc_file = input_values['mc_file']
histogram_path = input_values['histogram_path']
shape_keyword_in_path = input_values['shape_keyword_in_path']
shape_btag = input_values['shape_btag']
shape_btag_for_exceptions = input_values['shape_btag_for_exceptions']
shape_btag_exceptions = input_values['shape_btag_exceptions']
remove_for_shape = input_values['remove_for_shape']
normalisation_keyword_in_path = input_values['normalisation_keyword_in_path']
ignore_subpaths = input_values['ignore_subpaths']
subtract_other_samples = input_values['subtract_other_samples']
output_file = input_values['output_file']


total_histograms = 0
data_file_handle = root_open(data_file)
get_shape_hist = data_file_handle.Get
output = {}
with root_open(mc_file) as f:
for path,_,histograms in f.walk():
ignore_path = False
for subpath in ignore_subpaths:
if subpath in path:
ignore_path = True
if not histogram_path in path or not histograms or ignore_path:
continue
for histogram in histograms:
hist = f.Get(path + '/' + histogram)
normalisation = hist.integral(overflow = True)
shape_path = path.replace(normalisation_keyword_in_path, shape_keyword_in_path)
# now swap the b-tag
current_btag, _ = find_btag(histogram)
is_exception = False
for var in shape_btag_exceptions:
if var in histogram:
is_exception = True
shape_histogram = histogram
for r in remove_for_shape:
shape_histogram = shape_histogram.replace(r, '')
if is_exception:
shape_histogram = shape_histogram.replace(current_btag, shape_btag_for_exceptions)
else:
shape_histogram = shape_histogram.replace(current_btag, shape_btag)
gcd()
output_hist = get_shape_hist(shape_path + '/' + shape_histogram).clone()
other_samples = get_histogram_dictionary(shape_path + '/' + shape_histogram, subtract_other_samples)
subtract_samples = other_samples.keys()
other_samples['data'] = output_hist
output_hist = clean_control_region(other_samples,
subtract = subtract_samples)
# scale the histogram
n_entries_shape = output_hist.integral(overflow = True)
scale_factor = 1
if n_entries_shape > 0:
if normalisation == 0:
# bug fix for empty templates
scale_factor = 1/n_entries_shape
else:
scale_factor = normalisation/n_entries_shape

output_hist.Scale(scale_factor)
output[path + '/' + histogram] = output_hist
total_histograms += len(histograms)

data_file_handle.close()
output_file_handle = root_open(output_file, 'recreate')
# probably faster to use TFileCache within the loop above.
for path_with_hist, histogram in output.iteritems():
histogram_name = path_with_hist.split('/')[-1]
path = path_with_hist.replace('/' + histogram_name, '')
root_mkdir(output_file_handle, path)
output_file_handle.cd(path)
histogram.write(histogram_name)
output_file_handle.cd()
output_file_handle.close()
print 'Processed', total_histograms, 'histograms'

if __name__ == '__main__':
main()
Expand Down
23 changes: 23 additions & 0 deletions config/merging/qcd_sample_shape_from_data_electron.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"data_file": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/SingleElectron_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
"mc_file": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/QCD_Electron_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
"subtract_other_samples": {
"VJets": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/VJets_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
"TTJet": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/TTJet_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
"SingleTop": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/SingleTop_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root"
},
"histogram_path": "TTbar_plus_X_analysis/EPlusJets/Ref selection/",
"normalisation_keyword_in_path": "Ref selection",
"shape_keyword_in_path": "QCDConversions",
"shape_btag": "0btag",
"shape_btag_for_exceptions": "1btag",
"shape_btag_exceptions": ["angle_bl", "M_bl"],
"remove_for_shape": ["_reweighted"],
"ignore_subpaths": [
"GenMET",
"JetRes",
"Vertices",
"Ref selection/Jets"
],
"output_file": "QCD_Electron_from_conversions.root"
}
23 changes: 23 additions & 0 deletions config/merging/qcd_sample_shape_from_data_muon.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"data_file": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/SingleMu_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
"mc_file": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/QCD_Muon_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
"subtract_other_samples": {
"VJets": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/VJets_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
"TTJet": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/TTJet_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
"SingleTop": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/SingleTop_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root"
},
"histogram_path": "TTbar_plus_X_analysis/MuPlusJets/Ref selection/",
"normalisation_keyword_in_path": "Ref selection",
"shape_keyword_in_path": "QCD non iso mu+jets ge3j",
"shape_btag": "0btag",
"shape_btag_for_exceptions": "1btag",
"shape_btag_exceptions": ["angle_bl", "M_bl"],
"remove_for_shape": ["_reweighted"],
"ignore_subpaths": [
"GenMET",
"JetRes",
"Vertices",
"Ref selection/Jets"
],
"output_file": "QCD_Muon_from_noniso_ge3j.root"
}
78 changes: 52 additions & 26 deletions tools/ROOT_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,16 @@
from ROOT import gROOT
gcd = gROOT.cd
from config.summations_common import b_tag_bins_inclusive, b_tag_summations
from config.summations_common import b_tag_bins_exclusive

def get_histogram_from_file( histogram_path, input_file ):
current_btag = b_tag_bins_inclusive[0]
found_btag = False

for b_tag in b_tag_bins_inclusive:
if b_tag in histogram_path:
current_btag = b_tag
found_btag = True
break
current_btag, found_btag = find_btag(histogram_path)

root_file = File( input_file )
get_histogram = root_file.Get


if not found_btag:
if not found_btag or not current_btag in b_tag_summations.keys():
root_histogram = get_histogram( histogram_path )
if not is_valid_histogram( root_histogram, histogram_path, input_file ):
return
Expand Down Expand Up @@ -52,7 +46,6 @@ def is_valid_histogram( histogram, histogram_name, file_name ):
return False
return True


# Reads a single histogram from each given rootFile
# and returns a dictionary with the same naming as 'files'
def get_histogram_dictionary( histogram_path, files = {} ):
Expand All @@ -71,17 +64,10 @@ def get_histograms_from_files( histogram_paths = [], files = {}, verbose = False
histograms[sample] = {}

for histogram_path in histogram_paths:
current_btag = b_tag_bins_inclusive[0]
found_btag = False

for b_tag in b_tag_bins_inclusive:
if b_tag in histogram_path:
current_btag = b_tag
found_btag = True
break
current_btag, found_btag = find_btag(histogram_path)

root_histogram = None
if not found_btag:
if not found_btag or not current_btag in b_tag_summations.keys():
root_histogram = get_histogram( histogram_path )
if not is_valid_histogram( root_histogram, histogram_path, input_file ):
return
Expand All @@ -107,13 +93,6 @@ def get_histograms_from_files( histogram_paths = [], files = {}, verbose = False
root_file.Close()
return histograms

def root_file_mkdir( root_file, directory ):
pointer_to_directory = root_file.Get( directory )
if not pointer_to_directory:
root_file.mkdir( directory ) # if directory = a/b/c this will only return a, but make complete path
pointer_to_directory = root_file.Get( directory )
return pointer_to_directory

def get_histogram_info_tuple( histogram_in_path ):
histogram_name = histogram_in_path.split( '/' )[-1]
directory = ''.join( histogram_in_path.rsplit( histogram_name, 1 )[:-1] )
Expand All @@ -126,3 +105,50 @@ def set_root_defaults( set_batch = True, msg_ignore_level = 1001 ):
gROOT.SetBatch( set_batch )
# ignore warnings
gROOT.ProcessLine( 'gErrorIgnoreLevel = %d;' % msg_ignore_level )

def root_mkdir(file_handle, path):
'''
Equivalent to mkdir -p but for ROOT files.
Will create all the directories necessary to complete the given path
@param file_handle: file handle to an open ROOT file with write acccess
@param path: the path to be written to the ROOT file
'''
file_handle.cd()

directories = []
if '/' in path:
directories = path.split('/')
else:
directories = [path]

current_dir = ''
for directory in directories:
if current_dir == '':
current_dir = directory
else:
current_dir = current_dir + '/' + directory
if root_exists(file_handle, current_dir):
continue
file_handle.mkdir(current_dir)

def root_exists(file_handle, path):
pointer_to_directory = None
try:
pointer_to_directory = file_handle.GetDirectory( path )
except:
return False
return not (pointer_to_directory is None)

def find_btag( histogram_path ):
'''
function to determine if the histogram path contains a valid b-tag
multiplicity identifier (as specified in config.summations_common)
Returns (found b-tag, True) or (default b-tag, False)
'''
for b_tag in b_tag_bins_inclusive:
if b_tag in histogram_path:
return b_tag, True
for b_tag in b_tag_bins_exclusive:
if b_tag in histogram_path:
return b_tag, True
return b_tag_bins_inclusive[0], False