# MPNN Replication

Replication of the MPNN algorithm used on the Higgs Self Coupling data for signal seperation

## Imports

In [1]:
import ROOT
from ROOT import TLorentzVector
from math import sqrt
import h5py
import numpy as np
import awkward as ak
import pandas as pd
import json

Welcome to JupyROOT 6.24/00


## Getting Data

Getting the data with the baseline cuts as required

### Accessor Functions

In [2]:
def getArraysFromFile(inputFile : str, debug = True):
    '''
    Accessor function that gives back lists from hdf5 file. Due to the various conversions from 
    List --> Awkward --> HDF5 file, this function is made as to make the conversion back to list 
    simple.
    Parameters :
    ------------
    inputFile : str, required
        The hdf5 file with the converted list
    Returns :
    ---------
    particleArray : list
        The list that contains the 4-momenta of every particle in every event.
    azArray : list
        The list that contains the azimuthal angle of every particle in every event.
    '''
    if debug:
        print("INFO : Started Getting Data From File")

    hf = h5py.File(inputFile,'r')
    partArray = hf.get("ParticleArray")
    azimuthalArray = hf.get("AzimuthalAngle")
    etaArray = hf.get("EtaAngle")
    phiArray = hf.get("PhiAngle")

    reconstitutedPartArray = ak.from_buffers(
        ak.forms.Form.fromjson(partArray.attrs["form"]),
        json.loads(partArray.attrs["length"]),
        {k: np.asarray(v) for k, v in partArray.items()},
    )

    reconstitutedAzAngle = ak.from_buffers(
        ak.forms.Form.fromjson(azimuthalArray.attrs["form"]),
        json.loads(azimuthalArray.attrs["length"]),
        {k: np.asarray(v) for k, v in azimuthalArray.items()},
    )

    reconstitutedEtaAngle = ak.from_buffers(
        ak.forms.Form.fromjson(etaArray.attrs["form"]),
        json.loads(etaArray.attrs["length"]),
        {k: np.asarray(v) for k, v in etaArray.items()},
    )

    reconstitutedPhiAngle = ak.from_buffers(
        ak.forms.Form.fromjson(phiArray.attrs["form"]),
        json.loads(phiArray.attrs["length"]),
        {k: np.asarray(v) for k, v in phiArray.items()},
    )
    
    particleArray = ak.to_list(reconstitutedPartArray)
    azArray = ak.to_list(reconstitutedAzAngle)
    etaArray = ak.to_list(reconstitutedEtaAngle)
    phiArray = ak.to_list(reconstitutedPhiAngle)

    if debug:
        print("INFO : Done Getting Data from File")
    return particleArray,azArray,etaArray,phiArray

In [3]:
## Hyperparameters

process  = ['ttbar','llbj','tWj','ttV','ttbarh','taubb','hh']

In [4]:
part = []
angle = []
for proc in process:
    part_temp,angle_temp,_,_ = getArraysFromFile('../../datasets/baselineCuts_100k/' + proc + '_100k.h5',debug=False)
    part.append(part_temp)
    angle.append(angle_temp)
    print('INFO : The number of particles in each datasets are ' + str(len(part_temp)))

INFO : The number of particles in each datasets are 0
INFO : The number of particles in each datasets are 0
INFO : The number of particles in each datasets are 4
INFO : The number of particles in each datasets are 9
INFO : The number of particles in each datasets are 51
INFO : The number of particles in each datasets are 1
INFO : The number of particles in each datasets are 352


In [5]:
max_len = 0
for i in part:
    for j in i:
        if len(j) > max_len:
            max_len = len(j)

In [6]:
buffer_array = [0]*7
fin_array = part
for i in range(len(part)):
    for j in range(len(part[i])):
        #print(j)
        for l in range(max_len - len(part[i][j])):
           fin_array[i][j].append(buffer_array)

In [7]:
x_train = []
y_train = []

for i in range(len(fin_array) - 1):
    for j in range(len(fin_array[i])):
        x_train.append(fin_array[i][j])
        y_train.append(0)

for j in range(len(fin_array[-1])):
    x_train.append(fin_array[-1][j])
    y_train.append(1)

In [8]:
x_train = np.asarray(x_train)
y_train = np.asarray(y_train)

## Creating a custom rbf Layer

The layers required are 

s = sum(ReLU(W\[m;d\] + b))<br>
m = ReLU(W\[m\] + b)

In [10]:
model = tf.keras.models.Sequential()

In [11]:
model.add(tf.keras.layers.ReLU())

In [12]:
model.compile(optimizer='rmsprop', loss='binary_crossentropy')

In [13]:
model.fit(x_train,y_train)

InvalidArgumentError:  Incompatible shapes: [32,10,7] vs. [32,1]
	 [[node binary_crossentropy/mul_1 (defined at <ipython-input-13-4719cf73997a>:1) ]] [Op:__inference_train_function_254]

Function call stack:
train_function
