In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Dropout, BatchNormalization
from sklearn.utils import shuffle
from PIL import ImageFont
import datetime

In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

import tensorflow as tf

In [12]:
!rm -rf ./logs/

In [13]:
import tensorflow as tf
%load_ext tensorboard

In [3]:
files = ['ttbar','wmp','wpwm','zwpm','n2n2']

cs_lo_k = {
            'ttbar':988.57,
            'wmp'  :1.95*1e5,
            'wpwm' :124.31,
            'zwpm' :51.82,
            'n2n2' :1
          }

br_ratio = {
            'ttbar':0.67*(1-0.67)*2,
            'wmp'  :(1-0.67),
            'wpwm' :(1-0.67)*0.67*2,
            'zwpm' :0.7*(1-0.67),
            'n2n2' :1
          }

cs_nmg = {
         'ttbar':393.30,
         'wmp'  :7.865*1e4,
         'wpwm' :74.96,
         'zwpm' :14.28,
         'n2n2' :1
         }

cs_mg = {'ttbar':5.883,
          'wmp':111.5,
          'wpwm':0.944,
          'zwpm':0.2381,
          'n2n2':3.99*1e-4
        }

cs_pb = []
for f in files:
    cs_pb.append((cs_lo_k[f]*br_ratio[f]*cs_mg[f])/cs_nmg[f])

cs = [i*1e3 for i in cs_pb]
#k_f = [1.954,1.356,1.92,2.09,1.0]

cs_corr = {files[i] : cs[i] for i in range(len(files))}

In [27]:
list(cs_corr.values())

[6538.845366086956,
 91227.27272727272,
 692.2567850586979,
 199.5908264705882,
 0.399]

In [5]:
def get_res(x):
    res = np.zeros(shape=(x.shape[0],5))
    #print(x.shape[0],5)
    for i in range(len(x)):
        #print(i.x[i])
        res[i,x[i]] = 1
    
    return res

In [6]:
no_of_files = {'ttbar':0,
          'wmp':0,
          'wpwm':0,
          'zwpm':0,
          'n2n2':0
        }

In [7]:
df = []
for f in range(len(files)):
    con_df = []
    
    for i in range(1,53):
        try:
            con_df.append(pd.read_csv('~/neutrino/datasets/csvdata/' + files[f] + str(i) + '.csv'))
            no_of_files[files[f]] += 1
        except:
            pass
            #print("Not Here : ",files[f],i)
    
    df.append(pd.concat(con_df,ignore_index=True))
    df[-1]['type'] = f
    
    if files[f] == "n2n2":
        df[-1]['tag'] = 1
    else:
        df[-1]['tag'] = 0

In [8]:
df[-1].head()

Unnamed: 0,ptl,etal,energyl,ptj,etaj,energyj,massj,mjj,rjj,rjl,met,n21_1,n21_2,n32_1,n32_2,infl,drfl,type,tag
0,576.562,0.232894,592.269,459.681,0.248122,475.279,36.1316,379.243,1.14509,2.25305,889.067,0.190804,0.258472,0.517955,0.670891,769.506,1.86471,4,1
1,823.599,-0.4188,896.887,339.247,-0.696095,431.789,77.3122,1052.58,2.67178,2.07278,1930.33,0.197178,0.222316,0.468612,0.544453,1113.56,1.81712,4,1
2,354.758,-1.72193,1024.2,883.807,-0.817714,1200.2,98.775,1764.85,2.82048,0.922735,3256.57,0.328865,0.361102,0.40928,0.760725,1015.05,2.09248,4,1
3,858.275,0.386685,923.245,1258.81,0.184948,1285.29,112.015,1546.48,1.12283,2.55146,388.116,0.216487,0.234383,0.308864,0.722296,510.332,2.04593,4,1
4,590.04,-0.006999,590.055,811.602,0.44512,897.952,90.8938,818.864,1.16291,1.4955,1133.33,0.172456,0.329166,0.461283,0.696238,351.882,3.21196,4,1


In [9]:
dtset = pd.concat(df,ignore_index=True)
dtset = shuffle(dtset)
dtset['met'] = np.fabs(dtset['met'])

In [10]:
## Analysis Level Cuts
dtset = dtset[dtset['ptl'] >= 120.0][dtset['ptj'] >= 120.0][dtset['etaj'] <= 2.0][dtset['etaj'] >= -2.0]

  dtset = dtset[dtset['ptl'] >= 120.0][dtset['ptj'] >= 120.0][dtset['etaj'] <= 2.0][dtset['etaj'] >= -2.0]


In [11]:
train_len = int(0.8*len(dtset))
x_train = dtset.T[:-2].T[:train_len]
y_train = get_res(dtset['type'][:train_len].values)

x_test = dtset.T[:-2].T[train_len:]
y_test = get_res(dtset['type'][train_len:].values)

print('Shapes : ',x_train.shape,y_train.shape,x_test.shape,y_test.shape)

Shapes :  (9034704, 17) (9034704, 5) (2258676, 17) (2258676, 5)


In [16]:
backup_callback = tf.keras.callbacks.experimental.BackupAndRestore(backup_dir="/home2/kalp_shah/tmp/backup")

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [80]:
def significance(y_true, y_pred):
    y_pred = tf.argmax(y_pred,axis=1,output_type = 'int32')
    y_pred = tf.cast(y_pred,'float32')
    
    y_true = tf.argmax(y_true,axis=1,output_type = 'int32')
    y_true = tf.cast(y_true,'float32')
    
    cross_sec = [6538.845366086956,91227.27272727272,692.2567850586979,199.5908264705882,0.399]
    L = 3000
    
    # Signal
    values = tf.logical_and(tf.equal(y_true,tf.cast(4,'float32')), tf.equal(y_pred, tf.cast(4,'float32')))
    values = tf.cast(values,'float32')
    
    total_sum = tf.reduce_sum(tf.cast(tf.equal(y_true,tf.cast(4,'float32')),'float32'))
    ns = (tf.reduce_sum(values)*cross_sec[4]*L)/total_sum
    
    nb = 0
    for i in range(3):
        values = tf.logical_and(tf.equal(y_true,tf.cast(i,'float32')), tf.equal(y_pred, tf.cast(4,'float32')))
        values = tf.cast(values, 'float32')
        
        total_sum = tf.reduce_sum(tf.cast(tf.equal(y_true,tf.cast(i,'float32')),'float32'))
        nb += (tf.reduce_sum(values)*cross_sec[i]*L)/total_sum
    
    print(ns,nb)
    
    return ns/tf.sqrt(nb)

In [91]:
class Significance(tf.keras.metrics.Metric):

    def __init__(self, name='significane', **kwargs):
        super(Significance, self).__init__(name=name, **kwargs)
        self.significance = self.add_weight(name='tp', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.argmax(y_pred,axis=1,output_type = 'int32')
        y_pred = tf.cast(y_pred,self.dtype)

        y_true = tf.argmax(y_true,axis=1,output_type = 'int32')
        y_true = tf.cast(y_true,self.dtype)

        cross_sec = [6538.845366086956,91227.27272727272,692.2567850586979,199.5908264705882,0.399]
        L = 3000

        # Signal
        values = tf.logical_and(tf.equal(y_true,tf.cast(4,self.dtype)), tf.equal(y_pred, tf.cast(4,self.dtype)))
        values = tf.cast(values,self.dtype)

        total_sum = tf.reduce_sum(tf.cast(tf.equal(y_true,tf.cast(4,self.dtype)),self.dtype))
        ns = (tf.reduce_sum(values)*cross_sec[4]*L)/total_sum

        nb = 0
        for i in range(3):
            values = tf.logical_and(tf.equal(y_true,tf.cast(i,self.dtype)), tf.equal(y_pred, tf.cast(4,self.dtype)))
            values = tf.cast(values, self.dtype)

            total_sum = tf.reduce_sum(tf.cast(tf.equal(y_true,tf.cast(i,self.dtype)),self.dtype))
            nb += (tf.reduce_sum(values)*cross_sec[i]*L)/total_sum

        self.significance.assign(ns/tf.sqrt(nb))

    def result(self):
        return self.significance

In [92]:
model = Sequential()
input_shape = x_train.shape

from keras.layers.normalization.batch_normalization import BatchNormalization
model.add(Dense(10,activation = 'relu',input_dim = input_shape[1]))
model.add(BatchNormalization())
model.add(Dense(25,activation = 'relu',input_dim = 10))
model.add(BatchNormalization())
model.add(Dense(40,activation = 'relu',input_dim = 25))
model.add(BatchNormalization())
model.add(Dense(20,activation = 'relu',input_dim = 40))
model.add(BatchNormalization())
model.add(Dense(12,activation = 'relu',input_dim = 20))
model.add(BatchNormalization())
model.add(Dense(8,activation = 'relu',input_dim = 12))
model.add(BatchNormalization())
model.add(Dense(5,activation = 'softmax',input_dim = 8))

model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy',Significance()])

In [93]:
model.fit(x_train,y_train,epochs=100,batch_size=512,validation_split=0.2)#,class_weight={0:5,1:7,2:4,3:4,4:.005})

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100

KeyboardInterrupt: 

In [94]:
tot_pred = model.predict(dtset.T[:-2].T)

In [95]:
def get_back_ax(x):
    return x.argmax(axis=1)

In [96]:
sol = get_back_ax(tot_pred)

In [97]:
sol.shape

(11293380,)

In [98]:
sol[2:100]

array([0, 1, 2, 3, 1, 4, 4, 3, 4, 4, 4, 0, 4, 3, 0, 0, 1, 0, 0, 3, 3, 3,
       3, 0, 2, 3, 3, 3, 4, 3, 4, 0, 0, 1, 3, 3, 3, 3, 4, 3, 0, 0, 4, 3,
       3, 1, 3, 3, 0, 0, 3, 0, 3, 3, 3, 1, 0, 3, 4, 4, 4, 4, 0, 3, 0, 1,
       3, 0, 0, 1, 1, 3, 0, 0, 3, 0, 0, 3, 3, 3, 3, 2, 0, 1, 3, 4, 3, 1,
       0, 0, 1, 0, 3, 0, 0, 0, 3, 4])

In [99]:
pred_set = dtset.copy()
pred_set['pred'] = sol

In [100]:
cor_pred = len(pred_set[train_len:][pred_set['pred'] == 4][pred_set['tag'] == 1]) + len(pred_set[train_len:][pred_set['pred'] != 4][pred_set['tag'] == 0])
print('The accuracy of the test set is : ',cor_pred/(len(pred_set[train_len:])))

  cor_pred = len(pred_set[train_len:][pred_set['pred'] == 4][pred_set['tag'] == 1]) + len(pred_set[train_len:][pred_set['pred'] != 4][pred_set['tag'] == 0])


The accuracy of the test set is :  0.9648395785849764


In [101]:
import warnings
warnings.filterwarnings('ignore')

In [102]:
print('Correctly identified signal (True Positive)     : ',len(pred_set[pred_set['pred'] == 4][pred_set['tag'] == 1]))
print('Falsely identified signal (False Positive)      : ',len(pred_set[pred_set['pred'] == 4][pred_set['tag'] == 0]))
print('Correctly identified background (True Negative) : ',len(pred_set[pred_set['pred'] != 4][pred_set['tag'] == 0]))
print('Falsely identified background (False Negative)  : ',len(pred_set[pred_set['pred'] != 4][pred_set['tag'] == 1]))

Correctly identified signal (True Positive)     :  3095718
Falsely identified signal (False Positive)      :  272399
Correctly identified background (True Negative) :  7801107
Falsely identified background (False Negative)  :  124156


In [103]:
print('The amount of signal left is     :', len(pred_set[pred_set['pred'] == 4][pred_set['tag'] == 1])/len(df[-1]))
print('The amount of background left is :', len(pred_set[pred_set['pred'] == 4][pred_set['tag'] == 0])/np.sum([len(i) for i in df[:-1]]))

The amount of signal left is     : 0.9304503176623131
The amount of background left is : 0.028291674105721722


In [104]:
print('Thus, the rate of correct signal prediction is : ',len(pred_set[pred_set['pred'] == 4][dtset['tag'] == 1])/(len(pred_set[pred_set['pred'] == 4])))

Thus, the rate of correct signal prediction is :  0.9191242465745697


In [105]:
L = 3000

In [106]:
ns = cs_corr['n2n2']*(len(pred_set[pred_set['pred'] == 4][pred_set['tag'] == 1])/(no_of_files['n2n2']*1e5))*L
print('n2n2',(len(pred_set[pred_set['pred'] == 4][pred_set['tag'] == 1])),cs_corr['n2n2'])
nb = 0

for i in range(len(files)-1):
    nb += cs_corr[files[i]]*(len(pred_set[pred_set['pred'] == 4][pred_set['tag'] == 0][pred_set['type'] == i])/((no_of_files[files[i]]*1e5)))*L
    print(files[i],len(pred_set[pred_set['pred'] == 4][pred_set['tag'] == 0][pred_set['type'] == i]),cs_corr[files[i]])

n2n2 3095718 0.399
ttbar 25170 6538.845366086956
wmp 26712 91227.27272727272
wpwm 85232 692.2567850586979
zwpm 135285 199.5908264705882


In [107]:
print('The number of signal is :', ns)
print('The number of background is :', nb)
print('The significance is :',ns/np.sqrt(nb))

The number of signal is : 950.147293846154
The number of background is : 2029700.677461161
The significance is : 0.6669218365068351


In [43]:
df[2].head()

Unnamed: 0,ptl,etal,energyl,ptj,etaj,energyj,massj,mjj,rjj,rjl,met,n21_1,n21_2,n32_1,n32_2,infl,drfl,type,tag
0,136.238,-1.42766,300.323,129.439,0.128849,131.679,17.4703,60.5694,0.661161,1.59314,120.812,0.065091,0.234445,0.0,0.540246,25.6794,0.01788,2,0
1,177.032,1.32703,357.172,174.278,0.793436,233.272,23.5985,143.214,2.13823,0.725024,815.576,0.417532,0.721365,0.505062,0.539998,123.95,1.49283,2,0
2,207.788,1.245,390.733,496.299,1.04603,797.173,76.4529,850.102,2.10145,0.771494,1909.77,0.218092,0.3689,0.271128,0.504858,297.691,1.8243,2,0
3,153.798,0.165185,155.901,391.888,-0.474061,437.114,17.7209,769.629,2.88061,1.34304,727.674,0.0,0.20979,0.3831,0.39424,398.036,1.15835,2,0
4,163.467,0.292749,170.522,231.087,-0.338531,245.678,24.481,205.674,2.03972,0.758088,115.665,0.020697,0.095246,0.163543,0.641187,208.345,2.38173,2,0


In [44]:
model.save('/home/iiit/Datasets/Models/s5')

2022-03-28 13:33:39.000110: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


PermissionDeniedError: /home/blizzard; Permission denied