-
Notifications
You must be signed in to change notification settings - Fork 2
/
runStatisticsChecks.py
executable file
·85 lines (65 loc) · 7.32 KB
/
runStatisticsChecks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python
from __future__ import print_function, division
import os, sys, argparse, subprocess, math
import ROOT
import stealthEnv, commonFunctions
ROOT.gROOT.SetBatch(ROOT.kTRUE)
ROOT.TH1.AddDirectory(ROOT.kFALSE)
inputArgumentsParser = argparse.ArgumentParser(description='Plot impacts of each nuisance parameter given a datacard.')
inputArgumentsParser.add_argument('--outputFolder', required=True, help='Path to folder in which to store output files.',type=str)
inputArgumentsParser.add_argument('--datacardTemplateParentFolderWithPrefix', required=True, help='Path to EOS folder (including xrootd prefix) from which to fetch datacard.',type=str)
inputArgumentsParser.add_argument('--datacardTemplateFileName', required=True, help='Name of datacard.',type=str)
inputArgumentsParser.add_argument('--identifier', required=True, help='Human-readable ID for the output.',type=str)
inputArguments = inputArgumentsParser.parse_args()
# If "identifier" contains anything other than a letter from the alphabet, it can't be used in a TeX macro
if not((inputArguments.identifier).isalpha()): sys.exit("ERROR: argument \"identifier\" can only contain letters.")
signal_strengths = [0, 1]
output_folders = {
0: "{oF}/{i}/asimov_signal0".format(oF=inputArguments.outputFolder, i=inputArguments.identifier),
1: "{oF}/{i}/asimov_signal1".format(oF=inputArguments.outputFolder, i=inputArguments.identifier)
}
output_signal_strength_identifiers = {
0: "signal_zero",
1: "signal_injected"
}
output_signal_strength_identifiers_tex = {
0: "SignalZero",
1: "SignalInjected"
}
for signal_strength in signal_strengths:
# Make sure output folder exists
if not(os.path.isdir(output_folders[signal_strength])): subprocess.check_call("mkdir -p {oF}".format(oF=output_folders[signal_strength]), shell=True, executable="/bin/bash")
# Step 1: Copy datacard locally
stealthEnv.execute_in_env(commandToRun="cd {oF} && xrdcp --nopbar --force --path {iF}/{f} {f}".format(oF=output_folders[signal_strength], iF=inputArguments.datacardTemplateParentFolderWithPrefix, f=inputArguments.datacardTemplateFileName))
# Need to add the line "shapes * * FAKE" to "fake" a shape analysis, because that is required by the FitDiagnostics tool
stealthEnv.execute_in_env(commandToRun="cd {oF} && sed -i '/number of nuisance parameters/a shapes * * FAKE' {f}".format(oF=output_folders[signal_strength], f=inputArguments.datacardTemplateFileName))
# Step 2: Run FitDiagnostics
stealthEnv.execute_in_env(commandToRun="cd {oF} && combine -M FitDiagnostics -t -1 --expectSignal {s} --robustFit 1 --rMin -10 -d {f}".format(oF=output_folders[signal_strength], s=signal_strength, f=inputArguments.datacardTemplateFileName))
# Step 3: Get best-fit value of r and write it out to a TeX file
r_bestfit, r_error_lo, r_error_hi = commonFunctions.get_r_from_fit_diagnostics_output(input_file_path="{oF}/fitDiagnostics.root".format(oF=output_folders[signal_strength]), printDebug=False)
output_tex_file_r_bestfit = open("{oF}/r_best_fit_{i}_{si}.tex".format(oF=output_folders[signal_strength], i=inputArguments.identifier, si=output_signal_strength_identifiers[signal_strength]), 'w')
output_tex_file_r_bestfit.write("\\providecommand{{\\BestFitR{i}{si}}}{{Best fit $r$: {r_bestfit:.4G}~~-{r_error_lo:.3f}/+{r_error_hi:.3f}~~(68\\% CL)}}\n".format(i=inputArguments.identifier, si=output_signal_strength_identifiers_tex[signal_strength], r_bestfit=r_bestfit, r_error_lo=r_error_lo, r_error_hi=r_error_hi))
output_tex_file_r_bestfit.close()
# Step 4: Get pulls of nuisances using the standard "diffNuisances.py" script
stealthEnv.execute_in_env(commandToRun="cd {oF} && python ${{CMSSW_BASE}}/src/HiggsAnalysis/CombinedLimit/test/diffNuisances.py -a fitDiagnostics.root > {oF}/diffNuisances_{i}_{si}_raw.txt 2>&1".format(oF=output_folders[signal_strength], i=inputArguments.identifier, si=output_signal_strength_identifiers[signal_strength]))
# Step 5: Write out output of diffNuisances verbatim into a TeX file
commonFunctions.write_diffNuisances_output_into_tex_file(input_file_path="{oF}/diffNuisances_{i}_{si}_raw.txt".format(oF=output_folders[signal_strength], i=inputArguments.identifier, si=output_signal_strength_identifiers[signal_strength]), output_file_path="{oF}/diffNuisances_{i}_{si}.tex".format(oF=output_folders[signal_strength], i=inputArguments.identifier, si=output_signal_strength_identifiers[signal_strength]))
# Step 6: Create Combine workspace from datacard
stealthEnv.execute_in_env(commandToRun="cd {oF} && text2workspace.py {f} -m 125".format(oF=output_folders[signal_strength], f=inputArguments.datacardTemplateFileName))
workspace_path = "{oF}/{f}".format(oF=output_folders[signal_strength], f=(inputArguments.datacardTemplateFileName).replace(".txt", ".root"))
print("Produced workspace at: {w}".format(w=workspace_path))
if not(os.path.exists(workspace_path)): sys.exit("ERROR: expected to find file at location {w}, but found none.".format(w=workspace_path))
# Step 7: do initial fit to parameters of interest (just signal strength in our case) with --doInitialFit
stealthEnv.execute_in_env(commandToRun="cd {oF} && combineTool.py -M Impacts -d {w} -m 125 --doInitialFit --robustFit 1 --expectSignal {s} -t -1 --rMin -10".format(oF=output_folders[signal_strength], w=workspace_path, s=signal_strength))
# Step 8: fit for each nuisance parameter with --doFits
# stealthEnv.execute_in_env(commandToRun="cd {oF} && combineTool.py -M Impacts -d {w} -m 125 --robustFit 1 --doFits --expectSignal {s} -t -1 --rMin -10 --parallel 12".format(oF=output_folders[signal_strength], w=workspace_path, s=signal_strength))
stealthEnv.execute_in_env(commandToRun="cd {oF} && combineTool.py -M Impacts -d {w} -m 125 --robustFit 1 --doFits --expectSignal {s} -t -1 --rMin -10".format(oF=output_folders[signal_strength], w=workspace_path, s=signal_strength))
# Step 9: Collate outputs and write impacts into a json file
stealthEnv.execute_in_env(commandToRun="cd {oF} && combineTool.py -M Impacts -d {w} -m 125 --expectSignal {s} -t -1 --rMin -10 -o impacts.json".format(oF=output_folders[signal_strength], w=workspace_path, s=signal_strength))
# Step 10: Make the impact plots
stealthEnv.execute_in_env(commandToRun="cd {oF} && plotImpacts.py -i impacts.json -o impacts_{i}_{si} --label-size 0.04 --left-margin 0.55 --height 500 --per-page 21".format(oF=output_folders[signal_strength], i=inputArguments.identifier, si=output_signal_strength_identifiers[signal_strength]))
# Step 11: Rerun FitDiagnostics with extra plots
stealthEnv.execute_in_env(commandToRun="cd {oF} && combine -M FitDiagnostics -t -1 --expectSignal {s} --robustFit 1 --rMin -10 --saveWithUncertainties --saveOverallShapes --numToysForShapes 200 --plots -d {f}".format(oF=output_folders[signal_strength], s=signal_strength, f=inputArguments.datacardTemplateFileName))
# Step 12: Save high-res versions of 2D correlation plots, and print important values
commonFunctions.print_and_save_high_res_correlations(input_file_path="{oF}/fitDiagnostics.root".format(oF=output_folders[signal_strength]), output_folder=output_folders[signal_strength], suffix="{i}_{si}".format(i=inputArguments.identifier, si=output_signal_strength_identifiers[signal_strength]), list_correlations_to_save=["correlation_b", "correlation_s", "correlation_bins_b", "correlation_bins_s"])
print("All done!")