In [1]:
##-- The purpose of this notebook is to plot the DNN score of 2016 and 2018 Data and signal in response to the AN v4 comment, 
##-- asking for this because the DNN is trained only on 2017 
##-- https://twiki.cern.ch/twiki/bin/view/CMS/HIG-21-014

In [2]:
import uproot 
from matplotlib import pyplot as plt
import os 
import numpy as np 

In [3]:
# need 6 files: each data year with dnn score evaluated by Multiclass DNN in AN v4 
# each signal year evaluted by multiclass DNN in AN v4 

In [3]:
d = "/eos/user/a/atishelm/ntuples/HHWWgg_DNN/MultiClassifier/HHWWyyDNN_WithHggFactor2-200Epochs-3ClassMulticlass_EvenSingleH_2Hgg_withKinWeightCut10_BalanceYields/"
files = ["%s/%s"%(d, f) for f in os.listdir(d) if ".root" in f]

years = ["2016", "2017", "2018"]
for year in years:
    exec("data_%s = '%s/Data_%s_HHWWggTag_0_MoreVars.root'"%(year, d, year))
    exec("sig_%s = '%s/GluGluToHHTo2G2Qlnu_node_cHHH1_%s_HHWWggTag_0_MoreVars.root'"%(year, d, year))

In [18]:
##-- Plot Data

fig, ax = plt.subplots()
fig.set_size_inches(10, 10)
xmin, xmax, nbins = 0, 1, 70
bins = np.linspace(xmin, xmax, nbins + 1)

DNN_cut = 0.1 ##-- only keep events with DNN score greater than this value 
data_alpha = 1
sig_alpha = 0.5
lumi_rescale_vals = [59.4 / 35.9, 59.4 / 41.5, 1.]
year_colors = ['C0', 'C1', 'C2']

for iyear, year in enumerate(years):
    lumi_rescale_val = lumi_rescale_vals[iyear]
    color = year_colors[iyear]
    print("Plotting %s"%(year))
    exec("data_path = data_%s"%(year))
    exec("sig_path = sig_%s"%(year))
    data = uproot.open(data_path)
    sig = uproot.open(sig_path)
    
    ##-- Get Data evalDNN scores
    data_evalDNN_HH = data['Data_13TeV_HHWWggTag_0_v1']['evalDNN_HH'].array()
    data_CMS_hgg_mass = data['Data_13TeV_HHWWggTag_0_v1']['CMS_hgg_mass'].array()
    DATA_BLIND_MASK = np.logical_or(data_CMS_hgg_mass < 115, data_CMS_hgg_mass > 135)
    DATA_DNN_MASK = data_evalDNN_HH > DNN_cut
    DATA_MASK = np.logical_and(DATA_BLIND_MASK, DATA_DNN_MASK)
    data_evalDNN_HH = data_evalDNN_HH[DATA_MASK]
    
    ##-- To control normalization better
#     binHeights_, bin_edges = np.histogram(data_evalDNN_HH, bins = bins)
#     maxBinHeight = max(binHeights_)
#     print("maxBinHeight:",maxBinHeight)
#     binHeights_normed = binHeights_ / maxBinHeight
#     print("binHeights_:",binHeights_)
#     print("binHeights_normed:",binHeights_normed)  
#     print("bins[:-1]:",bins[:-1])    
#     plt.hist(bins[:-1], bins = bins, weights = binHeights_normed, label = "%s Data"%(year), histtype = 'step')
    
    ##-- To scale to 2018 luminosity 
    weights = [lumi_rescale_val for i in range(0,len(data_evalDNN_HH))]
    
    ##-- To use pyplot hist normalization
    binHeights_, bin_edges, _ = plt.hist(data_evalDNN_HH, bins = bins, weights = weights, label = "%s Data sideband"%(year), histtype = 'step', stacked = True, color = color, linewidth = 2, alpha = data_alpha)
#     binHeights_, bin_edges, _ = plt.hist(data_evalDNN_HH, bins = bins, label = "%s Data sideband"%(year), density = True, histtype = 'step', stacked = True, color = color, linewidth = 2, alpha = data_alpha)
    
#     print("binHeights_:",binHeights_)
#     print("bins[:,-1]:",bins[:,-1])
#     plt.plot(bins[:,-1], binHeights_)
    
    ##-- Get signal evalDNN scores
#     sig_evalDNN_HH = sig['GluGluToHHTo2G2Qlnu_node_cHHH1_13TeV_HHWWggTag_0_v1']['evalDNN_HH'].array()
#     sig_CMS_hgg_mass = sig['GluGluToHHTo2G2Qlnu_node_cHHH1_13TeV_HHWWggTag_0_v1']['CMS_hgg_mass'].array()
#     sig_weights = sig['GluGluToHHTo2G2Qlnu_node_cHHH1_13TeV_HHWWggTag_0_v1']['weight'].array()
#     SIGNAL_REGION = np.logical_and(sig_CMS_hgg_mass > 115, sig_CMS_hgg_mass < 135)
#     SIGNAL_DNN_MASK = sig_evalDNN_HH > DNN_cut
#     SIGNAL_MASK_ = np.logical_and(SIGNAL_REGION, SIGNAL_DNN_MASK)
#     SIGNAL_WEIGHT_MASK = abs(sig_weights) < 10.
#     SIGNAL_MASK = np.logical_and(SIGNAL_MASK_, SIGNAL_WEIGHT_MASK)
#     sig_evalDNN_HH = sig_evalDNN_HH[SIGNAL_MASK] 
#     sig_weights = sig_weights[SIGNAL_MASK]
    
    
    
#     binHeights_, bin_edges = np.histogram(sig_evalDNN_HH, bins = bins)
#     maxBinHeight = max(binHeights_)
#     print("maxBinHeight:",maxBinHeight)
#     binHeights_normed = binHeights_ / maxBinHeight
#     print("binHeights_:",binHeights_)
#     print("binHeights_normed:",binHeights_normed)  
#     print("bins[:-1]:",bins[:-1])    
#     plt.hist(bins[:-1], bins = bins, weights = binHeights_normed, label = "%s Data"%(year), histtype = 'step')    
    
#     plt.hist(sig_evalDNN_HH, bins = bins, label = "%s Signal"%(year), weights = sig_weights, density = True, histtype = 'step', color = color, linewidth = 2, alpha = sig_alpha)

    plt.legend(loc = 'upper center', fontsize = 20)
plt.xticks(fontsize = 20)
plt.yticks(fontsize = 20)
plt.xlabel("DNN score", fontsize = 20)
plt.ylabel("Normalized Entries", fontsize = 20)
plt.xlim(DNN_cut,1)
plt.grid()
plt.tight_layout()

# plt.savefig("/eos/user/a/atishelm/www/HHWWgg/AN_v5/YearByYear_SLDNN_scores_Data.pdf")
# plt.show()

##-- Linear
# plt.savefig("/eos/user/a/atishelm/www/HHWWgg/AN-20-165_v5/YearByYear_SLDNN_scores_Data.pdf")

##-- Log
plt.yscale('log')
plt.savefig("/eos/user/a/atishelm/www/HHWWgg/AN-20-165_v5/YearByYear_SLDNN_scores_Data_log.pdf")

plt.close()
    
print("DONE")


Plotting 2016
Plotting 2017
Plotting 2018
DONE


In [16]:
##-- Plot Signal

fig, ax = plt.subplots()
fig.set_size_inches(10, 10)
xmin, xmax, nbins = 0, 1, 70
bins = np.linspace(xmin, xmax, nbins + 1)

DNN_cut = 0.1 ##-- only keep events with DNN score greater than this value 
data_alpha = 1
sig_alpha = 1
lumi_rescale_vals = [59.4 / 35.9, 59.4 / 41.5, 1.]
year_colors = ['C0', 'C1', 'C2']

for iyear, year in enumerate(years):
    lumi_rescale_val = lumi_rescale_vals[iyear]
    color = year_colors[iyear]
    print("Plotting %s"%(year))
    exec("data_path = data_%s"%(year))
    exec("sig_path = sig_%s"%(year))
    data = uproot.open(data_path)
    sig = uproot.open(sig_path)
    
#     ##-- Get Data evalDNN scores
#     data_evalDNN_HH = data['Data_13TeV_HHWWggTag_0_v1']['evalDNN_HH'].array()
#     data_CMS_hgg_mass = data['Data_13TeV_HHWWggTag_0_v1']['CMS_hgg_mass'].array()
#     DATA_BLIND_MASK = np.logical_or(data_CMS_hgg_mass < 115, data_CMS_hgg_mass > 135)
#     DATA_DNN_MASK = data_evalDNN_HH > DNN_cut
#     DATA_MASK = np.logical_and(DATA_BLIND_MASK, DATA_DNN_MASK)
#     data_evalDNN_HH = data_evalDNN_HH[DATA_MASK]
    
#     ##-- To control normalization better
# #     binHeights_, bin_edges = np.histogram(data_evalDNN_HH, bins = bins)
# #     maxBinHeight = max(binHeights_)
# #     print("maxBinHeight:",maxBinHeight)
# #     binHeights_normed = binHeights_ / maxBinHeight
# #     print("binHeights_:",binHeights_)
# #     print("binHeights_normed:",binHeights_normed)  
# #     print("bins[:-1]:",bins[:-1])    
# #     plt.hist(bins[:-1], bins = bins, weights = binHeights_normed, label = "%s Data"%(year), histtype = 'step')
    
#     ##-- To scale to 2018 luminosity 
#     weights = [lumi_rescale_val for i in range(0,len(data_evalDNN_HH))]
    
    ##-- To use pyplot hist normalization
#     binHeights_, bin_edges, _ = plt.hist(data_evalDNN_HH, bins = bins, weights = weights, label = "%s Data sideband"%(year), histtype = 'step', stacked = True, color = color, linewidth = 2, alpha = data_alpha)
#     binHeights_, bin_edges, _ = plt.hist(data_evalDNN_HH, bins = bins, label = "%s Data sideband"%(year), density = True, histtype = 'step', stacked = True, color = color, linewidth = 2, alpha = data_alpha)
    
#     print("binHeights_:",binHeights_)
#     print("bins[:,-1]:",bins[:,-1])
#     plt.plot(bins[:,-1], binHeights_)
    
    ##-- Get signal evalDNN scores
    sig_evalDNN_HH = sig['GluGluToHHTo2G2Qlnu_node_cHHH1_13TeV_HHWWggTag_0_v1']['evalDNN_HH'].array()
    sig_CMS_hgg_mass = sig['GluGluToHHTo2G2Qlnu_node_cHHH1_13TeV_HHWWggTag_0_v1']['CMS_hgg_mass'].array()
    sig_weights = sig['GluGluToHHTo2G2Qlnu_node_cHHH1_13TeV_HHWWggTag_0_v1']['weight'].array()
    SIGNAL_REGION = np.logical_and(sig_CMS_hgg_mass > 115, sig_CMS_hgg_mass < 135)
    SIGNAL_DNN_MASK = sig_evalDNN_HH > DNN_cut
    SIGNAL_MASK_ = np.logical_and(SIGNAL_REGION, SIGNAL_DNN_MASK)
    SIGNAL_WEIGHT_MASK = abs(sig_weights) < 10.
    SIGNAL_MASK = np.logical_and(SIGNAL_MASK_, SIGNAL_WEIGHT_MASK)
    sig_evalDNN_HH = sig_evalDNN_HH[SIGNAL_MASK] 
    sig_weights = sig_weights[SIGNAL_MASK]
    
    
    
#     binHeights_, bin_edges = np.histogram(sig_evalDNN_HH, bins = bins)
#     maxBinHeight = max(binHeights_)
#     print("maxBinHeight:",maxBinHeight)
#     binHeights_normed = binHeights_ / maxBinHeight
#     print("binHeights_:",binHeights_)
#     print("binHeights_normed:",binHeights_normed)  
#     print("bins[:-1]:",bins[:-1])    
#     plt.hist(bins[:-1], bins = bins, weights = binHeights_normed, label = "%s Data"%(year), histtype = 'step')    
    
    plt.hist(sig_evalDNN_HH, bins = bins, label = "%s Signal"%(year), weights = sig_weights, density = True, histtype = 'step', color = color, linewidth = 2, alpha = sig_alpha)

    plt.legend(loc = 'upper center', fontsize = 20)
plt.xticks(fontsize = 20)
plt.yticks(fontsize = 20)
plt.xlabel("DNN score", fontsize = 20)
plt.ylabel("Normalized Entries", fontsize = 20)
plt.xlim(DNN_cut,1)
plt.grid()
plt.tight_layout()
# plt.yscale('log')
# plt.savefig("/eos/user/a/atishelm/www/HHWWgg/AN_v5/YearByYear_SLDNN_scores_Sig_log.pdf")
plt.savefig("/eos/user/a/atishelm/www/HHWWgg/AN-20-165_v5/YearByYear_SLDNN_scores_sig.pdf")
# plt.show()
plt.close()
    
print("DONE")

Plotting 2016
Plotting 2017
Plotting 2018
DONE
