## Task 2

Set up a Neural Network to reconstruct the aco_angle_1 from basic variables

In [1]:
!pip install --user uproot
import sys
sys.path.append("/eos/home-m/acraplet/.local/lib/python2.7/site-packages")



In [2]:
import uproot 
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, roc_auc_score
import xgboost as xgb
import matplotlib as mpl
#mpl.use('Agg')
import matplotlib.pyplot as plt


In [3]:
# loading the tree
tree = uproot.open("/eos/user/d/dwinterb/SWAN_projects/Masters_CP/MVAFILE_GluGluHToTauTauUncorrelatedDecay_Filtered_tt_2018.root")["ntuple"]

In [12]:
variables4 = ["ip_x_1","ip_y_1","ip_z_1",
              "ip_x_2","ip_y_2","ip_z_2",
              "aco_angle_1", 
              "pi_E_1","pi_px_1","pi_py_1","pi_pz_1",
              "pi_E_2","pi_px_2","pi_py_2","pi_pz_2",
              "tau_decay_mode_1","tau_decay_mode_2",
              "mva_dm_1","mva_dm_2",
              "pi0_E_1","pi0_px_1","pi0_py_1","pi0_pz_1",
              "pi0_E_2","pi0_px_2","pi0_py_2","pi0_pz_2",
              "y_1_1","y_1_2",
              "rand","wt_cp_sm", "wt_cp_ps", "wt_cp_mm",
              "pt_1","pt_2",
              "met",
              "aco_angle_1", "aco_angle_5", "aco_angle_7", "aco_angle_6",
              "y_1_1", "y_1_2",
              "ip_sig_1", "ip_sig_2",
              "deepTauVsJets_medium_1","deepTauVsJets_medium_2",
              "deepTauVsEle_vvloose_1","deepTauVsEle_vvloose_2",
              "deepTauVsMu_vloose_1","deepTauVsMu_vloose_2","trg_doubletau" 
             ]


df4 = tree.pandas.df(variables4)

df4 = df4[
      (df4["tau_decay_mode_1"] == 1) 
    & (df4["tau_decay_mode_2"] == 1) 
    & (df4["mva_dm_1"] == 1) 
    & (df4["mva_dm_2"] == 1)
]



df_ps = df4[
      (df4["rand"]<df4["wt_cp_ps"]/2)     #a data frame only including the pseudoscalars
]

df_sm = df4[
      (df4["rand"]<df4["wt_cp_sm"]/2)     #data frame only including the scalars
]


In [13]:
df_sm.head()

Unnamed: 0_level_0,ip_x_1,ip_y_1,ip_z_1,ip_x_2,ip_y_2,ip_z_2,aco_angle_1,pi_E_1,pi_px_1,pi_py_1,...,y_1_2,ip_sig_1,ip_sig_2,deepTauVsJets_medium_1,deepTauVsJets_medium_2,deepTauVsEle_vvloose_1,deepTauVsEle_vvloose_2,deepTauVsMu_vloose_1,deepTauVsMu_vloose_2,trg_doubletau
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15,0.006546,0.000134,0.009108,-0.002586,0.00102,-0.000611,5.015406,3.912128,3.186334,0.086207,...,-0.339467,3.072476,2.24895,True,True,True,True,True,True,True
26,0.00236,-0.000295,-0.011431,0.004199,0.003093,0.014879,3.019532,20.389432,12.846308,15.714182,...,0.760691,3.107849,2.581801,True,True,True,True,True,True,True
55,-0.001563,0.000772,0.000324,-0.020295,0.027176,-0.00481,3.0577,77.92905,28.100279,42.522791,...,0.53436,1.000902,6.068112,False,False,True,True,True,True,False
56,0.001578,-0.002075,-0.002149,-0.004772,-0.002109,-0.002552,5.388603,72.600672,24.495848,-38.046234,...,-0.065211,1.192274,2.911133,True,True,True,True,True,True,True
58,0.02339,0.022899,0.078718,0.013788,0.009657,0.103421,5.058131,32.103182,11.478175,27.700438,...,-0.081182,6.564064,6.81689,False,False,True,True,True,True,False


In [14]:
# create target labels (y)
         
# prepare the target labels
y_sm = pd.DataFrame(np.ones(df_sm.shape[0]))
y_ps = pd.DataFrame(np.zeros(df_ps.shape[0]))

y = pd.concat([y_sm, y_ps])  #is this just about having the right shape ?
y.columns = ["class"]

# prepare the dataframe to use in training
X = pd.concat([df_sm, df_ps])

# drop any other variables that aren't required in training

X2 = X.drop([
            "wt_cp_sm","wt_cp_ps","wt_cp_mm", "rand",
            "tau_decay_mode_1","tau_decay_mode_2","mva_dm_1","mva_dm_2",
            "deepTauVsJets_medium_1","deepTauVsJets_medium_2",
            "deepTauVsEle_vvloose_1","deepTauVsEle_vvloose_2",
            "deepTauVsMu_vloose_1","deepTauVsMu_vloose_2",
            "trg_doubletau",
           ], axis=1).reset_index(drop=True) 

# now we create a seperate version of X where we drop all variables except for aco_angle_1 
# which is the most sensitive simple variable

X1 = X2.drop(X2.columns.difference(["aco_angle_1"]), axis=1).reset_index(drop=True) 


#how do we print the dimensions of a panda object ? I would like to see if we indeed only
#have 'aco_angle_1' on the X1 variable.