In [1]:
import time
t1 = time.time()

import math
import os
import ROOT
import uproot
import numpy as np
import pandas as pd
import h5py

Welcome to JupyROOT 6.20/04


In [2]:
##### load data and model #####
##### setting file path #####
sig_rootpath = "/data/Chen-Wang/gghh_highPT_250_200K/Events/run_01_decayed_1/tag_1_delphes_events.root"
sig_file = uproot.open(sig_rootpath)

ttbarBG_rootpath = "/data/Chen-Wang/ttbarBG_200K/Events/run_01/tag_1_delphes_events.root"
ttbarBG_file = uproot.open(ttbarBG_rootpath)

jjBG_rootpath = "/data/Chen-Wang/jjBG_200PT_200K/Events/run_01/tag_1_delphes_events.root"
jjBG_file = uproot.open(jjBG_rootpath)

##### include signal data #####
sig_events = [sig_file["Delphes;1"]["Particle.Status"].array(),
        sig_file["Delphes;1"]["Particle.PT"].array(),
        sig_file["Delphes;1"]["Particle.Eta"].array(),
        sig_file["Delphes;1"]["Particle.Phi"].array(),
        sig_file["Delphes;1"]["FatJet.Particles"].array(),
        sig_file["Delphes;1"]["FatJet.Eta"].array(),
        sig_file["Delphes;1"]["FatJet.Phi"].array(),
        sig_file["Delphes;1"]["FatJet.PT"].array(),
        sig_file["Delphes;1"]["FatJet"].array(),
        sig_file["Delphes;1"]["FatJet.Mass"].array(),
        sig_file["Delphes;1"]["Jet.BTag"].array()
        ]

##### reshape the signal data #####

sig_events = np.expand_dims(sig_events, axis=-1)
sig_events = sig_events.transpose((1,0,2))
sig_events = np.squeeze(sig_events,axis=(2,))

num_sig = len(sig_events)

##### include ttbarBG data #####

ttbarBG_events = [ttbarBG_file["Delphes;1"]["Particle.Status"].array(),
        ttbarBG_file["Delphes;1"]["Particle.PT"].array(),
        ttbarBG_file["Delphes;1"]["Particle.Eta"].array(),
        ttbarBG_file["Delphes;1"]["Particle.Phi"].array(),
        ttbarBG_file["Delphes;1"]["FatJet.Particles"].array(),
        ttbarBG_file["Delphes;1"]["FatJet.Eta"].array(),
        ttbarBG_file["Delphes;1"]["FatJet.Phi"].array(),
        ttbarBG_file["Delphes;1"]["FatJet.PT"].array(),
        ttbarBG_file["Delphes;1"]["FatJet"].array(),
        ttbarBG_file["Delphes;1"]["FatJet.Mass"].array(),
        ttbarBG_file["Delphes;1"]["Jet.BTag"].array()
        ]

##### reshape the ttbarBG data #####

ttbarBG_events = np.expand_dims(ttbarBG_events, axis=-1)
ttbarBG_events = ttbarBG_events.transpose((1,0,2))
ttbarBG_events = np.squeeze(ttbarBG_events,axis=(2,))

num_ttbarBG = len(ttbarBG_events)

##### include jjBG data #####

jjBG_events = [jjBG_file["Delphes;1"]["Particle.Status"].array(),
        jjBG_file["Delphes;1"]["Particle.PT"].array(),
        jjBG_file["Delphes;1"]["Particle.Eta"].array(),
        jjBG_file["Delphes;1"]["Particle.Phi"].array(),
        jjBG_file["Delphes;1"]["FatJet.Particles"].array(),
        jjBG_file["Delphes;1"]["FatJet.Eta"].array(),
        jjBG_file["Delphes;1"]["FatJet.Phi"].array(),
        jjBG_file["Delphes;1"]["FatJet.PT"].array(),
        jjBG_file["Delphes;1"]["FatJet"].array(),
        jjBG_file["Delphes;1"]["FatJet.Mass"].array(),
        jjBG_file["Delphes;1"]["Jet.BTag"].array()
        ]

##### reshape the jjBG data #####

jjBG_events = np.expand_dims(jjBG_events, axis=-1)
jjBG_events = jjBG_events.transpose((1,0,2))
jjBG_events = np.squeeze(jjBG_events,axis=(2,))

num_jjBG = len(jjBG_events)

t2 = time.time()
print("Time:", t2-t1)

Time: 196.51850152015686


In [3]:
#####   useful function    #####
##### select if Fat Jet>=2 #####

def Fat_Jet_selection(events):
    where1 = np.where(events[:,8]>=2)
    return events[where1]

##### select if M_jet > 50 GeV #####

def mass_selection(events):
    where1 = []
    for i in range(len(events)):
        switch=1
        if events[i][9][0]<50:
            switch=0
        if events[i][9][1]<50:
            switch=0
        if switch==1:
            where1.append(i)
    return events[where1]

##### select if Btag (small jet) >= 2 #####

def Btag_selection(events):
    where1 = []
    for i in range(len(events)):
        if np.sum(events[i][10]) >= 2:    ### 5: Jet.BTag
            where1.append(i)
    return events[where1]

##### calculate X_HH #####

def X_HH(jet_mass1, jet_mass2):
    diff1 = jet_mass1 - 124
    diff2 = jet_mass2 - 124
    if diff1<diff2:
        m1 = jet_mass1
        m2 = jet_mass2
    else:
        m1 = jet_mass2
        m2 = jet_mass1
    return np.sqrt(((m1-124)/(0.1*m1+0.00001))**2 + ((m2-115)/(0.1*m2+0.00001))**2)

##### select X_HH < 10 #####

def X_HH_selection(events):
    where1 = []
    for i in range(len(events)):
        if X_HH(events[i][9][0], events[i][9][1]) < 10:
            where1.append(i)
    return events[where1]

##### define figure generator #####

def PT_generator(event):
    grid_num = 50    # means n^2 points
    where_jet_particle = event[4][0]+event[4][1]
    where_jet_particle = np.array(where_jet_particle)-1
    particle_Eta = event[2][where_jet_particle] - event[5][0]
    particle_Phi = event[3][where_jet_particle] - event[6][0]
    particle_PT = event[1][where_jet_particle]
    
    xedges = np.linspace(-5, 5, grid_num+1)        # Eta limit
    yedges = np.linspace(-5, 5, grid_num+1)    # Phi limit
    PT = np.zeros([grid_num,grid_num])
    
    for Eta in range(grid_num):
        for Phi in range(grid_num):
            where1 = np.where((xedges[Eta]<particle_Eta) & (particle_Eta<xedges[Eta+1]) & (yedges[Phi]<particle_Phi) & (particle_Phi<yedges[Phi+1]))
            PT[Eta][Phi] = sum(particle_PT[where1])
            
    return PT.T    ### after transpose: X=eta Y=phi

##### define leading jet generator #####

def leadingJ_generator(jet_events):
    grid_num = 50    # means n^2 points
    where_jet_particle = jet_events[4][0]
    where_jet_particle = np.array(where_jet_particle)-1
    particle_Eta = jet_events[2][where_jet_particle] - jet_events[5][0]    # do the centerize
    particle_Phi = jet_events[3][where_jet_particle] - jet_events[6][0]    # do the centerize
    particle_PT = jet_events[1][where_jet_particle]
    
    xedges = np.linspace(-1, 1, grid_num+1)    # Eta limit
    yedges = np.linspace(-1, 1, grid_num+1)    # Phi limit
    PT = np.zeros([grid_num,grid_num])
    
    for Eta in range(grid_num):
        for Phi in range(grid_num):
            where1 = np.where((xedges[Eta]<particle_Eta) & (particle_Eta<xedges[Eta+1]) & (yedges[Phi]<particle_Phi) & (particle_Phi<yedges[Phi+1]))
            PT[Eta][Phi] = sum(particle_PT[where1])
    
    leading_b = np.where(PT==np.max(PT))
    PT[leading_b[0][0]-4:leading_b[0][0]+4, leading_b[1][0]-4:leading_b[1][0]+4] = 0
    subleading_b = np.where(PT==np.max(PT))

    eta_diff = (subleading_b[0][0]-leading_b[0][0])*(1/grid_num)     # each grid different (1/grid_num)
    phi_diff = (subleading_b[1][0]-leading_b[1][0])*(1/grid_num)     # each grid different (1/grid_num)
    cos_theta = (0*eta_diff + -1*phi_diff)/(np.sqrt(eta_diff**2 + phi_diff**2)+10**(-5))     # inner product with (0,-1)
    sin_theta = np.sqrt(1-cos_theta**2) if eta_diff>0 else -np.sqrt(1-cos_theta**2)

    particle_Eta_rot = cos_theta*particle_Eta + sin_theta*particle_Phi
    particle_Phi_rot = -sin_theta*particle_Eta + cos_theta*particle_Phi

    for Eta_rot in range(grid_num):
        for Phi_rot in range(grid_num):
            where1 = np.where((xedges[Eta_rot]<particle_Eta_rot) & (particle_Eta_rot<xedges[Eta_rot+1]) & (yedges[Phi_rot]<particle_Phi_rot) & (particle_Phi_rot<yedges[Phi_rot+1]))
            PT[Eta_rot][Phi_rot] = sum(particle_PT[where1])
    
    return PT.T

##### define sub-leading jet generator #####

def subleadingJ_generator(jet_events):
    grid_num = 50    # means n^2 points
    where_jet_particle = jet_events[4][1]
    where_jet_particle = np.array(where_jet_particle)-1
    particle_Eta = jet_events[2][where_jet_particle] - jet_events[5][1]    # do the centerize
    particle_Phi = jet_events[3][where_jet_particle] - jet_events[6][1]    # do the centerize
    particle_PT = jet_events[1][where_jet_particle]
    
    xedges = np.linspace(-1, 1, grid_num+1)    # Eta limit
    yedges = np.linspace(-1, 1, grid_num+1)    # Phi limit
    PT = np.zeros([grid_num,grid_num])
    
    for Eta in range(grid_num):
        for Phi in range(grid_num):
            where1 = np.where((xedges[Eta]<particle_Eta) & (particle_Eta<xedges[Eta+1]) & (yedges[Phi]<particle_Phi) & (particle_Phi<yedges[Phi+1]))
            PT[Eta][Phi] = sum(particle_PT[where1])
    
    leading_b = np.where(PT==np.max(PT))
    PT[leading_b[0][0]-4:leading_b[0][0]+4, leading_b[1][0]-4:leading_b[1][0]+4] = 0
    subleading_b = np.where(PT==np.max(PT))

    eta_diff = (subleading_b[0][0]-leading_b[0][0])*(1/grid_num)     # each grid different (1/grid_num)
    phi_diff = (subleading_b[1][0]-leading_b[1][0])*(1/grid_num)     # each grid different (1/grid_num)
    cos_theta = (0*eta_diff + -1*phi_diff)/(np.sqrt(eta_diff**2 + phi_diff**2)+10**(-5))     # inner product with (0,-1)
    sin_theta = np.sqrt(1-cos_theta**2) if eta_diff>0 else -np.sqrt(1-cos_theta**2)

    particle_Eta_rot = cos_theta*particle_Eta + sin_theta*particle_Phi
    particle_Phi_rot = -sin_theta*particle_Eta + cos_theta*particle_Phi

    for Eta_rot in range(grid_num):
        for Phi_rot in range(grid_num):
            where1 = np.where((xedges[Eta_rot]<particle_Eta_rot) & (particle_Eta_rot<xedges[Eta_rot+1]) & (yedges[Phi_rot]<particle_Phi_rot) & (particle_Phi_rot<yedges[Phi_rot+1]))
            PT[Eta_rot][Phi_rot] = sum(particle_PT[where1])
    
    return PT.T

In [4]:
##### Selection before put into model #####

print("There are totally", num_sig, "signal events.")
sig_events = Fat_Jet_selection(sig_events)
num_sig = len(sig_events)
print("There are", num_sig, "signal events after 2 fat jet selection.")
sig_events = mass_selection(sig_events)
num_sig = len(sig_events)
print("There are", num_sig, "signal events after fat jet mass selection.")
sig_events = Btag_selection(sig_events)
num_sig = len(sig_events)
print("There are", num_sig, "signal events after 2 BTag selection.")
sig_events = X_HH_selection(sig_events)
num_sig = len(sig_events)
print("There are", num_sig, "signal events after X_HH selection.")

print("\nThere are totally", num_ttbarBG, "ttbarBG events.")
ttbarBG_events = Fat_Jet_selection(ttbarBG_events)
num_ttbarBG = len(ttbarBG_events)
print("There are", num_ttbarBG, "ttbarBG events after 2 fat jet selection.")
ttbarBG_events = mass_selection(ttbarBG_events)
num_ttbarBG = len(ttbarBG_events)
print("There are", num_ttbarBG, "ttbarBG events after fat jet mass selection.")
ttbarBG_events = Btag_selection(ttbarBG_events)
num_ttbarBG = len(ttbarBG_events)
print("There are", num_ttbarBG, "ttbarBG events after 2 BTag selection.")
ttbarBG_events = X_HH_selection(ttbarBG_events)
num_ttbarBG = len(ttbarBG_events)
print("There are", num_ttbarBG, "ttbarBG events after X_HH selection.")

print("\nThere are totally", num_jjBG, "jjBG events.")
jjBG_events = Fat_Jet_selection(jjBG_events)
num_jjBG = len(jjBG_events)
print("There are", num_jjBG, "jjBG events after 2 fat jet selection.")
jjBG_events = mass_selection(jjBG_events)
num_jjBG = len(jjBG_events)
print("There are", num_jjBG, "jjBG events after fat jet mass selection.")
jjBG_events = Btag_selection(jjBG_events)
num_jjBG = len(jjBG_events)
print("There are", num_jjBG, "jjBG events after 2 BTag selection.")
jjBG_events = X_HH_selection(jjBG_events)
num_jjBG = len(jjBG_events)
print("There are", num_jjBG, "jjBG events after X_HH selection.")

There are totally 200000 signal events.
There are 174453 signal events after 2 fat jet selection.
There are 140271 signal events after fat jet mass selection.
There are 117438 signal events after 2 BTag selection.
There are 102879 signal events after X_HH selection.

There are totally 200000 ttbarBG events.
There are 123751 ttbarBG events after 2 fat jet selection.
There are 109522 ttbarBG events after fat jet mass selection.
There are 55248 ttbarBG events after 2 BTag selection.
There are 50065 ttbarBG events after X_HH selection.

There are totally 200000 jjBG events.
There are 123337 jjBG events after 2 fat jet selection.
There are 54308 jjBG events after fat jet mass selection.
There are 1702 jjBG events after 2 BTag selection.
There are 1033 jjBG events after X_HH selection.


In [None]:
##### generate image #####

sig_image = []
for events in range(num_sig):
    sig_image.append([PT_generator(sig_events[events]), leadingJ_generator(sig_events[events]), subleadingJ_generator(sig_events[events])])
    if events%20000==19999:
        print("check point")
    
print("Time:", time.time()-t1)
    
ttbarBG_image = []
for events in range(num_ttbarBG):
    ttbarBG_image.append([PT_generator(ttbarBG_events[events]), leadingJ_generator(ttbarBG_events[events]), subleadingJ_generator(ttbarBG_events[events])])
    if events%20000==19999:
        print("check point")
    
print("Time:", time.time()-t1)
    
jjBG_image = []
for events in range(num_jjBG):
    jjBG_image.append([PT_generator(jjBG_events[events]), leadingJ_generator(jjBG_events[events]), subleadingJ_generator(jjBG_events[events])])
    
print("Time:", time.time()-t1)

check point


In [None]:
##### output test data #####

h5f = h5py.File('/data/Chen-Wang/test_data_image.h5', 'w')
h5f.create_dataset('sig_image', data=sig_image)
h5f.create_dataset('ttbarBG_image', data=ttbarBG_image)
h5f.create_dataset('jjBG_image', data=jjBG_image)
h5f.close()