In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Dropout, BatchNormalization
from sklearn.utils import shuffle
from PIL import ImageFont

In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

import tensorflow as tf

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [11]:
files = ['ttbar','wmp','wpwm','zwpm','n2n2']

cs_lo_k = {
            'ttbar':988.57,
            'wmp'  :1.95*1e5,
            'wpwm' :124.31,
            'zwpm' :51.82,
            'n2n2' :1
          }

br_ratio = {
            'ttbar':0.67*(1-0.67)*2,
            'wmp'  :(1-0.67),
            'wpwm' :(1-0.67)*0.67*2,
            'zwpm' :0.7*(1-0.67),
            'n2n2' :1
          }

cs_nmg = {
         'ttbar':393.30,
         'wmp'  :7.865*1e4,
         'wpwm' :74.96,
         'zwpm' :14.28,
         'n2n2' :1
         }

cs_mg = {'ttbar':2.558,
          'wmp':36.8,
          'wpwm':2.9*1e-1,
          'zwpm':7.33*1e-2,
          'n2n2':3.99*1e-4
        }

cs_pb = []
for f in files:
  cs_pb.append((cs_lo_k[f]*br_ratio[f]*cs_mg[f])/cs_nmg[f])

cs = [i*1e3 for i in cs_pb]
#k_f = [1.954,1.356,1.92,2.09,1.0]

cs_corr = {files[i] : cs[i] for i in range(len(files))}

In [12]:
cs_list = list(cs_corr.values())
cs_list

[2843.169547246376,
 30109.0909090909,
 212.66363100320171,
 61.44480294117646,
 0.399]

In [16]:
class NSignal(tf.keras.metrics.Metric):

    def __init__(self,cross_section=cs_list,name='Ns',**kwargs):
        super(NSignal, self).__init__(name=name, **kwargs)
        self.ns = self.add_weight(name='tp', initializer='zeros')
        self.cs = cross_section
        
    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.cast(tf.argmax(y_true,axis=1),self.dtype)
        y_pred = tf.cast(tf.argmax(y_pred,axis=1),self.dtype)
        
        total = tf.equal(y_true, tf.cast(4,self.dtype))
        total = tf.cast(total, tf.bool)
        
        prediction = tf.equal(y_pred, tf.cast(4,self.dtype))
        prediction = tf.cast(prediction, tf.bool)
        
        signal = tf.logical_and(prediction,total)
        signal = tf.cast(signal,self.dtype)
        #signal = tf.multiply(signal,self.cs[-1])
        self.ns.assign_add(tf.reduce_sum(signal)*self.cs[-1])

    def result(self):
        return self.ns

In [17]:
class NBack(tf.keras.metrics.Metric):

    def __init__(self,cross_section=cs_list,name='Nb',**kwargs):
        super(NBack, self).__init__(name=name, **kwargs)
        self.nb = self.add_weight(name='tp', initializer='zeros')
        self.cs = cross_section
        
    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.cast(tf.argmax(y_true,axis=1),self.dtype)
        y_pred = tf.cast(tf.argmax(y_pred,axis=1),self.dtype)
        
        comp_back = tf.cast(0,self.dtype)
        for i in range(len(self.cs) - 1):
            total = tf.equal(y_true, tf.cast(i,self.dtype))
            total = tf.cast(total, tf.bool)
        
            prediction = tf.equal(y_pred, tf.cast(4,self.dtype))
            prediction = tf.cast(prediction, tf.bool)
        
            back = tf.logical_and(prediction,total)
            back = tf.cast(back,self.dtype)
            #back = tf.multiply(back,self.cs[i])
        
            comp_back += tf.reduce_sum(back)*self.cs[i]

        self.nb.assign_add(tf.cast(comp_back,self.dtype))

    def result(self):
        return self.nb

In [18]:
model = load_model('/home2/kalp_shah/datasets/models/s5',
                   custom_objects={'NSignal':NSignal,'NBack':NBack})

In [33]:
df = []
for f in range(len(files)):
    con_df = []
    
    for i in range(11,15):
        con_df.append(pd.read_csv('~/datasets/comp/' + files[f] + str(i) + '.csv'))
    
    df.append(pd.concat(con_df,ignore_index=True))
    df[-1]['type'] = f
    
    if files[f] == "n2n2":
        df[-1]['tag'] = 1
    else:
        df[-1]['tag'] = 0

FileNotFoundError: [Errno 2] No such file or directory: '/home2/kalp_shah/datasets/comp/ttbar12.csv'

In [20]:
dtset = pd.concat(df,ignore_index=True)
dtset = shuffle(dtset)
dtset['met'] = np.fabs(dtset['met'])

In [21]:
## Analysis Level Cuts
dtset = dtset[dtset['ptl'] >= 120.0][dtset['ptj'] >= 120.0][dtset['etaj'] <= 2.0][dtset['etaj'] >= -2.0]

In [22]:
tot_pred = model.predict(dtset.T[:-2].T)

In [23]:
def get_back_ax(x):
    return x.argmax(axis=1)

In [25]:
sol = get_back_ax(tot_pred)

In [26]:
pred_set = dtset.copy()
pred_set['pred'] = sol

In [27]:
print('Correctly identified signal (True Positive)     : ',len(pred_set[pred_set['pred'] >= 0.5][pred_set['tag'] == 1]))
print('Falsely identified signal (Flase Positive)      : ',len(pred_set[pred_set['pred'] >= 0.5][pred_set['tag'] == 0]))
print('Correctly identified background (True Negative) : ',len(pred_set[pred_set['pred'] < 0.5][pred_set['tag'] == 0]))
print('Falsely identified background (False Negative)  : ',len(pred_set[pred_set['pred'] < 0.5][pred_set['tag'] == 1]))

Correctly identified signal (True Positive)     :  78816
Falsely identified signal (Flase Positive)      :  92671
Correctly identified background (True Negative) :  66735
Falsely identified background (False Negative)  :  2310


In [28]:
print('The amount of signal left is     :', len(pred_set[pred_set['pred'] >= 0.5][pred_set['tag'] == 1])/len(df[-1]))
print('The amount of background left is :', len(pred_set[pred_set['pred'] >= 0.5][pred_set['tag'] == 0])/np.sum([len(i) for i in df[:-1]]))

The amount of signal left is     : 0.9314770605337178
The amount of background left is : 0.48143780391504926


In [29]:
print('Thus, the rate of correct signal prediction is : ',len(pred_set[pred_set['pred'] >= 0.5][dtset['tag'] == 1])/(len(pred_set[pred_set['pred'] >= 0.5])))

Thus, the rate of correct signal prediction is :  0.4596033518575752


In [30]:
L = 3000

In [35]:
ns = cs_corr['n2n2']*(len(pred_set[pred_set['pred'] == 4][pred_set['tag'] == 1])/(1e5))*L
print('n2n2',cs_corr['n2n2'],(len(pred_set[pred_set['pred'] ==4][pred_set['tag'] == 1])))
nb = 0

for i in range(len(files)-1):
    nb += cs_corr[files[i]]*(len(pred_set[pred_set['pred'] == 4][pred_set['tag'] == 0][pred_set['type'] == i])/((1e5)))*L
    print(files[i],len(pred_set[pred_set['pred'] == 4][pred_set['tag'] == 0][pred_set['type'] == i]),cs_corr[files[i]])

n2n2 0.399 6133
ttbar 0 2843.169547246376
wmp 0 30109.0909090909
wpwm 0 212.66363100320171
zwpm 1 61.44480294117646


In [36]:
print('The number of signal is :', ns)
print('The number of background is :', nb)
print('The significance is :',ns/np.sqrt(nb))

The number of signal is : 73.41201000000001
The number of background is : 1.843344088235294
The significance is : 54.07093877973052
