<center><h1> Getting Event IDs </h1>
John Ignacio R.M. Nov 2020</center>

In [1]:
# Libraries
ada_parent_dir = "../"
import sys
sys.path.append(ada_parent_dir)
from ada.data import read_dataset, split_dataset, split_dataset_by_mass
from ada.model import BinaryClassifierModel4 as BC4
from ada.plot import plot_confidence_matrix
import pandas as pd
import pickle

Using TensorFlow backend.
Welcome to JupyROOT 6.20/04


# No parameterized methods

In [2]:
# Constants
source_path = "../../processed_data/xtohhOct2020"
dest_path = "../../saved_models/oct_2020"
signal = "Xtohh2000"
bg = "Xtohh_background"
seed = 420
region = "SR"
tag = 2

In [3]:
# Read dataset
df = read_dataset(source_path, signal, bg, region, tag, includeEventNumber=True)
df.head()

Unnamed: 0,EventWeight,label,EventNumber,m_FJpt,m_FJeta,m_FJphi,m_FJm,m_DTpt,m_DTeta,m_DTphi,m_DTm,m_dPhiFTwDT,m_dRFJwDT,m_dPhiDTwMET,m_MET,m_hhm,m_bbttpt
0,0.00102,1,46746,885.55927,0.104927,-2.448576,127604.3,623.2271,-0.73151,0.656939,85368.72,3.105514,3.216185,-0.192894,237.88724,1631.6702,263.69763
1,0.001117,1,47033,855.69904,0.313113,-1.859287,122862.9,449.37933,-0.306591,0.920655,55754.656,2.779941,2.848176,-0.019178,485.03333,1293.4028,463.51236
2,0.000925,1,47483,552.47845,-0.702952,-2.549875,109091.67,631.70496,1.020756,0.610602,76213.55,3.122708,3.566858,-0.742126,104.55922,1659.8483,80.00815
3,0.001158,1,42700,525.1059,-0.941433,-1.130162,82382.28,416.48105,0.103909,1.981465,58332.793,3.111627,3.282524,-0.130256,166.60722,1075.3092,109.524925
4,0.00112,1,43588,916.44147,-0.683193,1.804695,115936.01,677.1644,-0.495132,-1.274429,94135.96,3.079124,3.084862,0.416606,167.3503,1595.7611,244.28346


In [4]:
# Splitting dataset
sets = split_dataset(df, 0.5, 0.3, 0.2, seed)

In [5]:
sets["x"]["train"]

Unnamed: 0,m_FJpt,m_FJeta,m_FJphi,m_FJm,m_DTpt,m_DTeta,m_DTphi,m_DTm,m_dPhiFTwDT,m_dRFJwDT,m_dPhiDTwMET,m_MET,m_hhm,m_bbttpt
0,0.827847,-0.314893,1.368961,-0.191919,-1.574508,0.587112,-0.218562,-0.547542,-0.552870,-0.815764,-0.547898,1.811293,-1.593004,1.989317
1,-2.090397,1.152870,1.215035,0.008585,-0.848421,0.892328,-0.506509,-0.227130,0.722139,-0.301069,-0.146605,-1.554302,-2.616303,-1.425038
2,0.091487,-0.776562,-0.056368,-0.224844,0.024554,0.073132,-1.696344,-0.951311,0.035988,-0.456565,0.986953,0.336859,-0.356872,-0.081265
3,-2.111163,1.705717,-0.781418,-0.577296,-0.557163,-1.171468,0.940387,0.720116,0.341807,2.155725,-1.972810,-1.260696,0.131294,-1.576849
4,-1.701825,1.252580,0.127648,-0.022834,-1.494951,-1.258084,-1.577730,-0.323814,0.777901,1.751913,0.114206,-0.376900,-1.239552,-0.502106
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4020,1.002168,-1.202891,1.159054,0.287033,1.504488,-1.424706,-0.720914,0.875852,-1.073042,-1.484756,0.646524,0.024455,0.917756,0.141872
4021,0.251713,-0.712676,-1.406567,0.165451,-1.582295,0.618638,0.304204,-0.959552,0.219210,0.036988,-0.809148,1.388175,-1.488102,1.407916
4022,1.111657,-0.889814,1.418946,0.615864,0.022252,-0.195299,-0.315099,-0.723037,0.579669,-0.201872,-0.370286,1.150312,0.148405,0.802472
4023,2.497307,-0.220446,-0.337409,2.783586,1.776812,0.540273,1.441045,0.079873,0.705435,-0.117171,0.312471,0.915044,2.558575,0.560754


In [6]:
sets["idx"]["train"]

array([   62599, 30878767,   121415, ...,   147935,   139413,    23673])

In [13]:
for dataset, ids in sets["idx"].items():
    with open(f"./events/{dataset}_ids.txt", 'w') as outfile:
        for id in ids:
            outfile.write(f"{id}\n")

# Parameterized methods

In [2]:
# Constants
seed = 420
source_path = "../../processed_data/xtohhOct2020"
dest_path = "../../saved_models/oct_2020"
signal = "Xtohh"
bg = "Xtohh_background"
region = "SR"
tag = 2
masses = [1000, 1200, 1400, 1600, 1800, 2000, 2500, 3000]

In [3]:
# Read dataset
dfs = {mass: read_dataset(source_path, signal+str(mass), bg, region, tag, includeEventNumber=True) for mass in masses}
df = pd.concat(dfs, names = ["mass", None])
df = df.reset_index(level = 0).reset_index(drop = True)
df.head()

Unnamed: 0,mass,EventWeight,label,EventNumber,m_FJpt,m_FJeta,m_FJphi,m_FJm,m_DTpt,m_DTeta,m_DTphi,m_DTm,m_dPhiFTwDT,m_dRFJwDT,m_dPhiDTwMET,m_MET,m_hhm,m_bbttpt
0,1000,0.000457,1,34641,498.5145,-0.536601,-2.818808,119716.39,447.70572,-1.163481,1.234735,97467.3,2.229642,2.316092,0.605876,71.70186,927.6991,419.14658
1,1000,0.000348,1,41471,549.95325,-1.375417,1.734355,121275.48,302.2314,-1.598841,-1.238216,96785.6,2.97257,2.980955,-0.169232,84.82113,846.36835,257.1056
2,1000,0.000591,1,39279,513.64594,0.340504,-2.402172,134068.61,362.13004,-0.013702,0.731264,100451.04,3.133436,3.153392,0.193749,81.10068,907.06665,151.55675
3,1000,0.00062,1,42277,483.49622,-1.430659,-0.469333,111137.67,410.57037,-1.323875,2.663501,105842.83,3.132834,3.134654,0.164603,64.79518,918.6268,73.030174
4,1000,0.000562,1,33144,456.22528,-0.444622,0.358149,165661.98,363.99133,-0.550266,-2.998335,94892.64,2.926702,2.928608,-0.206495,151.07983,853.5135,127.06724


In [4]:
sets = split_dataset_by_mass(df, 0.5, 0.3, 0.2, seed, masses)
sets["x"]["train"]

Unnamed: 0,mass,m_FJpt,m_FJeta,m_FJphi,m_FJm,m_DTpt,m_DTeta,m_DTphi,m_DTm,m_dPhiFTwDT,m_dRFJwDT,m_dPhiDTwMET,m_MET,m_hhm,m_bbttpt
0,-0.103132,-1.081378,-1.561519,0.770015,0.002674,-0.462546,-2.602616,-1.019711,-1.775361,0.155736,-0.306168,-2.107158,-1.030440,-0.975948,-1.042351
1,-1.627252,-1.854947,0.935572,1.193299,0.049201,-0.310186,-0.199290,-0.431959,1.217161,-0.280747,-0.373159,0.394894,-0.773211,-1.266130,-0.819643
2,0.277897,-0.007381,-1.590420,-1.081853,-0.264277,0.043084,0.021906,0.640364,0.084791,0.583685,0.651673,-0.407285,0.279459,0.265933,-0.270389
3,1.230472,0.835134,1.513247,0.276911,-0.197858,0.066589,-0.591119,-1.356283,-0.396464,-0.054032,0.846411,0.277642,0.631689,1.168457,0.806253
4,-0.484162,0.048727,0.912956,-0.547311,-0.577162,-0.230616,0.511062,1.147272,1.098089,0.369477,-0.377204,0.648391,-0.108955,-0.450609,0.058277
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23232,0.277897,1.061254,-1.088178,-1.084860,-0.076494,-0.713517,-0.770491,0.651681,-0.986836,0.737047,-0.160921,-0.232235,1.982732,-0.354397,1.711138
23233,1.230472,0.394801,2.063056,0.462199,0.082994,1.187936,-0.137103,-1.345573,0.707605,-0.085580,1.016671,0.483154,-0.500179,1.818419,-0.561552
23234,1.230472,-0.369736,-1.404929,-1.301822,-0.689007,0.637324,0.965766,0.441196,0.526783,0.773105,1.679836,-3.854815,-1.198623,1.116445,-1.255485
23235,1.230472,1.615623,0.303707,-0.068053,-0.246743,0.849442,-0.160408,1.647579,-0.612600,0.669516,-0.184259,0.258870,2.050509,0.757219,0.985509


In [8]:
for dataset, ids in sets["idx"].items():
    with open(f"./events/{dataset}_ids_MP.txt", 'w') as outfile:
        for id in ids:
            outfile.write(f"{id}\n")

In [9]:
for mass in masses:
    with open(f"./events/test_ids_MP{mass}.txt", 'w') as outfile:
        for id in sets[mass]["ids_test"]:
            outfile.write(f"{id}\n")