<center><h1>Tuning Mass parameterized neural network with class weights on SR2 with Feature Selection </h1>
John Ignacio R.M. Oct 2020</center>

In [1]:
# Ada library
ada_parent_dir = "../"
import sys
sys.path.append(ada_parent_dir)
from ada.data import read_dataset, split_dataset_by_mass, scores_per_class_weights, scores_per_mass
from ada.model import BinaryClassifierModel4 as BC4
from ada.plot import plot_confidence_matrix
import pandas as pd
import numpy as np

Using TensorFlow backend.
Welcome to JupyROOT 6.20/04


In [2]:
# Constants
seed = 420
source_path = "../../processed_data/xtohhOct2020"
dest_path = "../../saved_models/oct_2020"
signal = "Xtohh"
bg = "Xtohh_background"
region = "SR"
tag = 2
masses = [1000, 1200, 1400, 1600, 1800, 2000, 2500, 3000]
chi = ['m_FJpt', 'm_hhm', 'm_DTpt', 'm_MET', 'm_bbttpt']
extra_trees = ['m_hhm', 'm_FJm', 'm_DTm', 'm_FJpt', 'm_MET']
random_forest = ['m_hhm', 'm_FJm', 'm_DTm', 'm_dRFJwDT', 'm_MET']
all_feats = [
    'm_FJpt', 'm_FJeta', 'm_FJphi', 'm_FJm', 'm_DTpt', 'm_DTeta', 'm_DTphi', 'm_DTm',
    'm_dPhiFTwDT', 'm_dRFJwDT', 'm_dPhiDTwMET', 'm_MET', 'm_hhm', 'm_bbttpt',
]

In [3]:
# Read dataset
dfs = {mass: read_dataset(source_path, signal+str(mass), bg, region, tag) for mass in masses}
df = pd.concat(dfs, names = ["mass", None])
df = df.reset_index(level = 0).reset_index(drop = True)
df.head()

Unnamed: 0,mass,EventWeight,label,m_FJpt,m_FJeta,m_FJphi,m_FJm,m_DTpt,m_DTeta,m_DTphi,m_DTm,m_dPhiFTwDT,m_dRFJwDT,m_dPhiDTwMET,m_MET,m_hhm,m_bbttpt
0,1000,0.000457,1,498.5145,-0.536601,-2.818808,119716.39,447.70572,-1.163481,1.234735,97467.3,2.229642,2.316092,0.605876,71.70186,927.6991,419.14658
1,1000,0.000348,1,549.95325,-1.375417,1.734355,121275.48,302.2314,-1.598841,-1.238216,96785.6,2.97257,2.980955,-0.169232,84.82113,846.36835,257.1056
2,1000,0.000591,1,513.64594,0.340504,-2.402172,134068.61,362.13004,-0.013702,0.731264,100451.04,3.133436,3.153392,0.193749,81.10068,907.06665,151.55675
3,1000,0.00062,1,483.49622,-1.430659,-0.469333,111137.67,410.57037,-1.323875,2.663501,105842.83,3.132834,3.134654,0.164603,64.79518,918.6268,73.030174
4,1000,0.000562,1,456.22528,-0.444622,0.358149,165661.98,363.99133,-0.550266,-2.998335,94892.64,2.926702,2.928608,-0.206495,151.07983,853.5135,127.06724


In [4]:
def get_feature_set_score(BC, feature_set, signal_weight, bg_weight_list, th_list, dest_path, title, seed):
    print("Features:", feature_set)
    dfs = {mass: read_dataset(source_path, signal+str(mass), bg, region, tag, features=feature_set) for mass in masses}
    df = pd.concat(dfs, names = ["mass", None])
    df = df.reset_index(level = 0).reset_index(drop = True)
    df.head()

    sets = split_dataset_by_mass(df, 0.5, 0.3, 0.2, seed, masses)

    return scores_per_class_weights(BC4, sets, signal_weight, bg_weight_list, th_list, dest_path, title)

In [5]:
def scores_by_feature_set(BC, features, signal_weight, bg_weight_list, th_list, dest_path, seed):
    return pd.concat({
        name: get_feature_set_score(BC4, feature_set, signal_weight, bg_weight_list, th_list, dest_path, f"SR2_MP_{name}_Oct2020", seed)
        for name, feature_set in features.items()
    }, names = ["feature_set", "bg_weight", "th"])

In [6]:
scores = scores_by_feature_set(BC4, {"chi": chi, "et": extra_trees, "rf": random_forest, "all": all_feats}, 10, range(10, 21, 1), np.arange(0.1, 0.9, 0.1), dest_path, seed)
#scores_by_feature_set(BC4, {"chi": chi, "et": extra_trees}, 10, [10, 11], [0.1, 0.2], dest_path, seed)

Features: ['m_FJpt', 'm_hhm', 'm_DTpt', 'm_MET', 'm_bbttpt']
Loading bg weight: 10
Loading bg weight: 11
Training with bg weight: 12
Training with bg weight: 13
Training with bg weight: 14
Training with bg weight: 15
Training with bg weight: 16
Training with bg weight: 17
Training with bg weight: 18
Training with bg weight: 19
Training with bg weight: 20
Features: ['m_hhm', 'm_FJm', 'm_DTm', 'm_FJpt', 'm_MET']
Loading bg weight: 10
Loading bg weight: 11
Training with bg weight: 12
Training with bg weight: 13
Training with bg weight: 14
Training with bg weight: 15
Training with bg weight: 16
Training with bg weight: 17
Training with bg weight: 18
Training with bg weight: 19
Training with bg weight: 20
Features: ['m_hhm', 'm_FJm', 'm_DTm', 'm_dRFJwDT', 'm_MET']
Training with bg weight: 10
Training with bg weight: 11
Training with bg weight: 12
Training with bg weight: 13
Training with bg weight: 14
Training with bg weight: 15
Training with bg weight: 16
Training with bg weight: 17
Traini

In [7]:
scores

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0,1,wavg
feature_set,bg_weight,th,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
chi,10,0.1,0.600793,0.917659,0.843292
chi,10,0.2,0.638059,0.921193,0.854742
chi,10,0.3,0.663071,0.923188,0.862140
chi,10,0.4,0.727068,0.931821,0.883766
chi,10,0.5,0.728853,0.928442,0.881599
...,...,...,...,...,...
all,20,0.4,0.922097,0.976542,0.963764
all,20,0.5,0.926112,0.977216,0.965222
all,20,0.6,0.931862,0.978248,0.967362
all,20,0.7,0.921544,0.974401,0.961996


In [14]:
scores.nlargest(10, [0])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0,1,wavg
feature_set,bg_weight,th,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
all,10,0.5,0.93253,0.97957,0.96853
all,20,0.6,0.931862,0.978248,0.967362
all,15,0.5,0.930409,0.978358,0.967104
all,14,0.6,0.929715,0.978392,0.966968
all,12,0.5,0.929177,0.978536,0.966952
all,12,0.6,0.929164,0.97796,0.966508
all,19,0.3,0.928962,0.978245,0.966679
all,14,0.7,0.928136,0.97695,0.965494
all,12,0.7,0.927577,0.976586,0.965084
all,10,0.6,0.927412,0.977559,0.96579


In [9]:
scores.nlargest(10, ["wavg"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0,1,wavg
feature_set,bg_weight,th,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
all,10,0.5,0.93253,0.97957,0.96853
all,20,0.6,0.931862,0.978248,0.967362
all,15,0.5,0.930409,0.978358,0.967104
all,14,0.6,0.929715,0.978392,0.966968
all,12,0.5,0.929177,0.978536,0.966952
all,19,0.3,0.928962,0.978245,0.966679
all,12,0.6,0.929164,0.97796,0.966508
all,12,0.4,0.926378,0.9782,0.966038
all,10,0.6,0.927412,0.977559,0.96579
all,18,0.3,0.925512,0.97779,0.96552
