Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added a tool to do a bin by bin DQM root file comparison #26283

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
380 changes: 380 additions & 0 deletions DQMServices/FileIO/scripts/blacklist.py

Large diffs are not rendered by default.

145 changes: 145 additions & 0 deletions DQMServices/FileIO/scripts/compareDQMOutput.py
@@ -0,0 +1,145 @@
#!/bin/env python

import os
import sys
import glob
import argparse
import subprocess
from threading import Thread

COMPARISON_RESULTS = []

def collect_and_compare_files(base_dir, pr_dir, output_dir, num_procs, pr_number, test_number, release_format):
files = get_file_pairs(base_dir, pr_dir)

threads = []
for _ in range(num_procs):
thread = Thread(target=compare, args=(base_dir, pr_dir, output_dir, files, pr_number, test_number, release_format))
thread.start()
threads.append(thread)

[thread.join() for thread in threads]

COMPARISON_RESULTS.sort(key=lambda k: float(k['workflow']))

def compare(base_dir, pr_dir, output_dir, files, pr_number, test_number, release_format):
while files:
try:
file_name = files.pop()
command = ['python', os.path.join(os.path.dirname(__file__), 'compareHistograms.py'), '-b', os.path.join(base_dir, file_name), \
'-p', os.path.join(pr_dir, file_name), '-o', output_dir, '-n', pr_number, '-t', test_number, '-r', release_format]
print('Running comparison:')
print(' '.join(command))

output = subprocess.check_output(command)

output_elements = output.split('\n')[1:]
base_output_filename = output_elements[0]
pr_output_filename = output_elements[1]
run_nr = base_output_filename.split('_')[2].lstrip('R').lstrip('0')
output_numbers = output_elements[2].split(' ')

workflow = os.path.basename(os.path.dirname(os.path.join(base_dir, file_name))).split('_')[0]
base_dataset = '/' + '/'.join(base_output_filename.rstrip('.root').split('__')[1:])
pr_dataset = '/' + '/'.join(pr_output_filename.rstrip('.root').split('__')[1:])

COMPARISON_RESULTS.append({'workflow': workflow, 'base_dataset': base_dataset, 'pr_dataset': pr_dataset, 'run_nr': run_nr,\
'changed_elements': int(output_numbers[0]), 'removed_elements': int(output_numbers[1]), 'added_elements': int(output_numbers[2])})
except:
pass

def get_file_pairs(base_dir, pr_dir):
base_files = glob.glob(os.path.join(base_dir, '*.*_*/DQM_*.root'))
pr_files = glob.glob(os.path.join(pr_dir, '*.*_*/DQM_*.root'))

# Remove base directories and leave
# only parts of paths that are same
base_files = map(lambda x: os.path.relpath(x, base_dir), base_files)
pr_files = map(lambda x: os.path.relpath(x, pr_dir), pr_files)

# Find intersection
return [value for value in base_files if value in pr_files]

def upload_to_gui(output_dir, num_procs):
base_files = glob.glob(os.path.join(output_dir, 'base/*.root'))
pr_files = glob.glob(os.path.join(output_dir, 'pr/*.root'))

files = base_files + pr_files

for _ in range(num_procs):
thread = Thread(target=upload, args=(files,))
thread.start()

def upload(files):
while files:
try:
file = files.pop()
command = ['python', os.path.join(os.path.dirname(__file__), 'visDQMUpload.py'), 'https://cmsweb.cern.ch/dqm/dev', file]
print('Uploading output:')
print(' '.join(command))

subprocess.call(command)
print('')
except:
pass

def generate_summary_html(output_dir, pr_number, summary_dir):
template_file = open(os.path.join(os.path.dirname(__file__), 'dqm-histo-comparison-summary-template.html'), 'r')
result = template_file.read()

result = result.replace('$PR_NUMBER$', pr_number)

table_items = ''
total_changes = 0

for comp in COMPARISON_RESULTS:
total_changes += comp['removed_elements'] + comp['added_elements'] + comp['changed_elements']
baseline_count = comp['changed_elements'] + comp['removed_elements']
pr_count = comp['changed_elements'] + comp['added_elements']
overlay_count = baseline_count

# Make urls
base_url = 'https://cmsweb.cern.ch/dqm/dev/start?runnr=%s;dataset%%3D%s;sampletype%%3Doffline_relval;workspace%%3DEverything;' % (comp['run_nr'], comp['base_dataset'])
pr_url = 'https://cmsweb.cern.ch/dqm/dev/start?runnr=%s;dataset%%3D%s;sampletype%%3Doffline_relval;workspace%%3DEverything;' % (comp['run_nr'], comp['pr_dataset'])
overlay_url = 'https://cmsweb.cern.ch/dqm/dev/start?runnr=%s;dataset%%3D%s;referenceshow%%3Dall;referenceobj1%%3Dother::%s::;sampletype%%3Doffline_relval;workspace%%3DEverything;' \
% (comp['run_nr'], comp['pr_dataset'], comp['base_dataset'])

table_items += ' <tr>\n'
table_items += ' <td><a href="%s" target="_blank">%s baseline GUI</a><span> (%s)</span></td>\n' % (base_url, comp['workflow'], baseline_count)
table_items += ' <td><a href="%s" target="_blank">%s pr GUI</a><span> (%s)</span></td>\n' % (pr_url, comp['workflow'], pr_count)
table_items += ' <td><a href="%s" target="_blank">%s overlay GUI</a><span> (%s)</span></td>\n' % (overlay_url, comp['workflow'], overlay_count)
table_items += ' <td><span class="removed">-%s</span><span class="added">+%s</span><span class="changed">%s</span></td>\n' \
% (comp['removed_elements'], comp['added_elements'], comp['changed_elements'])
table_items += ' </tr>\n'

result = result.replace('$TOTAL_CHANGES$', str(total_changes))
result = result.replace('$NUMBER_OF_WORKFLOWS$', str(len(COMPARISON_RESULTS)))
result = result.replace('$PER_WORKFLOW_LIST$', table_items)
template_file.close()

# Write output
result_file_path = os.path.join(summary_dir, 'dqm-histo-comparison-summary.html')
if os.path.dirname(result_file_path):
if not os.path.exists(os.path.dirname(result_file_path)):
os.makedirs(os.path.dirname(result_file_path))
summary_file = open(result_file_path, 'w')
summary_file.write(result)
summary_file.close()

if __name__ == '__main__':
parser = argparse.ArgumentParser(description="This tool compares DQM monitor elements within DQM files found in base-dir with the ones found in in pr-dir. "
"All workflow directories are searched for correctly named DQM root files. "
"Comparison is done bin by bin and output is written to a root files containing only the changes.")
parser.add_argument('-b', '--base-dir', help='Baseline IB directory', default='basedata/')
parser.add_argument('-p', '--pr-dir', help='PR directory', default='prdata/')
parser.add_argument('-o', '--output-dir', help='Comparison root files output directory', default='dqmHistoComparisonOutput')
parser.add_argument('-j', '--nprocs', help='Number of processes', default=1, type=int)
parser.add_argument('-n', '--pr-number', help='PR number under test', required=True)
parser.add_argument('-t', '--test-number', help='Unique test number to distinguish different comparisons of the same PR.', default='1')
parser.add_argument('-r', '--release-format', help='Release format in this format: CMSSW_10_5_X_2019-02-17-0000', required=True)
parser.add_argument('-s', '--summary-dir', help='Directory where summary with all links will be saved', default='')
args = parser.parse_args()

collect_and_compare_files(args.base_dir, args.pr_dir, args.output_dir, args.nprocs, args.pr_number, args.test_number, args.release_format)
upload_to_gui(args.output_dir, args.nprocs)
generate_summary_html(args.output_dir, args.pr_number, args.summary_dir)