In [1]:
import save_and_load
import importlib
import substructure
import numpy as np
import csv_decoder
from matplotlib import pyplot
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

In [2]:
# Loading VH and hj
vh_event_list, hj_event_list, vh_mass_list, hj_mass_list,\
        vh_higgs, hj_higgs, vh_weight, hj_weight, vh_image_list, hj_image_list,\
        vh_recluster_images, hj_recluster_images = save_and_load.load_binary('vh-hj', name_1='vh', name_2='hj')

In [3]:
# Cluster 0.8 jets
importlib.reload(csv_decoder)
vh_clustered = csv_decoder.cluster_event(vh_event_list)
hj_clustered = csv_decoder.cluster_event(hj_event_list)
vh_reclustered, vh_non_higgs_jets = csv_decoder.recluster_event(vh_clustered, vh_higgs)
hj_reclustered, hj_non_higgs_jets = csv_decoder.recluster_event(hj_clustered, hj_higgs)

In [8]:
# Extracting data for VH
importlib.reload(substructure)
vh_higgs_pt = []
vh_higgs_eta = []
vh_non_higgs_leading_m = []
vh_non_higgs_leading_pt = []
vh_non_higgs_leading_eta = []
vh_N2 = substructure.find_new_var_N_2(vh_reclustered,vh_clustered)

# High-level features used for training. The Features included are:
# [
# 0: higgs mass
# 1: higgs pt
# 2: higgs eta
# 3: non-higgs mass
# 4: non-higgs pt
# 5: non-higgs eta
# 6: non-higgs n2
vh_x = []
for i in range(len(vh_event_list)):
    vh_higgs_pt.append(vh_higgs[i][0])
    vh_higgs_eta.append(vh_higgs[i][1])
    vh_non_higgs_leading_m.append(vh_non_higgs_jets[i].mass)
    vh_non_higgs_leading_pt.append(vh_non_higgs_jets[i].pt)
    vh_non_higgs_leading_eta.append(vh_non_higgs_jets[i].eta)
    
    vh_x.append((
        vh_mass_list[i],
        vh_higgs[i][0],
        vh_higgs[i][1],
        vh_non_higgs_jets[i].mass,
        vh_non_higgs_jets[i].pt,
        vh_non_higgs_jets[i].eta,
        vh_N2[i]))

KeyboardInterrupt: 

In [None]:
# Extracting data for hj
hj_higgs_pt = []
hj_higgs_eta = []
hj_non_higgs_leading_m = []
hj_non_higgs_leading_pt = []
hj_non_higgs_leading_eta = []

# High-level features used for training. The Features included are:
# [
# 0: higgs mass
# 1: higgs pt
# 2: higgs eta
# 3: non-higgs mass
# 4: non-higgs pt
# 5: non-higgs eta
# 6: non-higgs n2
hj_train = []
for i in range(len(hj_event_list)):
    hj_higgs_pt.append(hj_higgs[i][0])
    hj_higgs_eta.append(hj_higgs[i][1])
    hj_non_higgs_leading_m.append(hj_non_higgs_jets[i].mass)
    hj_non_higgs_leading_pt.append(hj_non_higgs_jets[i].pt)
    hj_non_higgs_leading_eta.append(hj_non_higgs_jets[i].eta)
    
    hj_train.append((
        hj_mass_list[i],
        hj_higgs[i][0],
        hj_higgs[i][1],
        hj_non_higgs_jets[i].mass,
        hj_non_higgs_jets[i].pt,
        hj_non_higgs_jets[i].eta,
        substructure.find_new_var_N_2(hj_reclustered,hj_clustered)))

In [None]:
# A simple method splitting binary training data
# the first group of data (background) is automatically given classification 0,
# and the decond group (signal) is automatically given classification 1
# the parameter rsplit specifies 2 cutting points
def split_data(background_image_list, signal_image_list, rsplit = np.array([0.5,0.75])):        
    b_split = np.split(background_image_list,(len(background_image_list)*rsplit).astype(int))
    s_split = np.split(signal_image_list,(len(signal_image_list)*rsplit).astype(int))
    
    x_train = np.concatenate((b_split[0],s_split[0]))
    y_train = np.array(np.concatenate((np.zeros(len(b_split[0])),np.ones(len(s_split[0])))))
    
    x_val = np.concatenate((b_split[1],s_split[1]))
    y_val = np.array(np.concatenate((np.zeros(len(b_split[1])),np.ones(len(s_split[1])))))
    
    x_test = np.concatenate((b_split[2],s_split[2]))
    y_test = np.array(np.concatenate((np.zeros(len(b_split[2])),np.ones(len(s_split[2])))))
    return(input_shape,
           x_train,y_train,mass_train,
           x_val,y_val,mass_val,
           x_test,y_test,mass_test,
           )

In [None]:
# Creating classifier
bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=5),
                         algorithm="SAMME.R",
                         n_estimators=200)
