# MPNN Replication

Replication of the MPNN algorithm used on the Higgs Self Coupling data for signal seperation

## Imports

In [20]:
import ROOT
from ROOT import TLorentzVector
from math import sqrt
import h5py
import numpy as np
import awkward as ak
import pandas as pd
import json
import tensorflow as tf

## Getting Data

Getting the data with the baseline cuts as required

### Accessor Functions

In [21]:
def getArraysFromFile(inputFile : str, debug = True):
    '''
    Accessor function that gives back lists from hdf5 file. Due to the various conversions from 
    List --> Awkward --> HDF5 file, this function is made as to make the conversion back to list 
    simple.
    Parameters :
    ------------
    inputFile : str, required
        The hdf5 file with the converted list
    Returns :
    ---------
    particleArray : list
        The list that contains the 4-momenta of every particle in every event.
    azArray : list
        The list that contains the azimuthal angle of every particle in every event.
    '''
    if debug:
        print("INFO : Started Getting Data From File")

    hf = h5py.File(inputFile,'r')
    partArray = hf.get("ParticleArray")
    azimuthalArray = hf.get("AzimuthalAngle")
    etaArray = hf.get("EtaAngle")
    phiArray = hf.get("PhiAngle")

    reconstitutedPartArray = ak.from_buffers(
        ak.forms.Form.fromjson(partArray.attrs["form"]),
        json.loads(partArray.attrs["length"]),
        {k: np.asarray(v) for k, v in partArray.items()},
    )

    reconstitutedAzAngle = ak.from_buffers(
        ak.forms.Form.fromjson(azimuthalArray.attrs["form"]),
        json.loads(azimuthalArray.attrs["length"]),
        {k: np.asarray(v) for k, v in azimuthalArray.items()},
    )

    reconstitutedEtaAngle = ak.from_buffers(
        ak.forms.Form.fromjson(etaArray.attrs["form"]),
        json.loads(etaArray.attrs["length"]),
        {k: np.asarray(v) for k, v in etaArray.items()},
    )

    reconstitutedPhiAngle = ak.from_buffers(
        ak.forms.Form.fromjson(phiArray.attrs["form"]),
        json.loads(phiArray.attrs["length"]),
        {k: np.asarray(v) for k, v in phiArray.items()},
    )
    
    particleArray = ak.to_list(reconstitutedPartArray)
    azArray = ak.to_list(reconstitutedAzAngle)
    etaArray = ak.to_list(reconstitutedEtaAngle)
    phiArray = ak.to_list(reconstitutedPhiAngle)

    if debug:
        print("INFO : Done Getting Data from File")
    return particleArray,azArray,etaArray,phiArray

In [22]:
## Hyperparameters

process  = ['ttbar','tWj','ttV','ttbarh','taubb','hh']

In [23]:
part = []
angle = []
for proc in process:
    part_temp,angle_temp,_,_ = getArraysFromFile('../../datasets/baseline/' + proc + '_1M.h5',debug=False)
    part.append(part_temp)
    angle.append(angle_temp)
    print('INFO : The number of particles in each datasets are ' + str(len(part_temp)))

INFO : The number of particles in each datasets are 5
INFO : The number of particles in each datasets are 43
INFO : The number of particles in each datasets are 71
INFO : The number of particles in each datasets are 508
INFO : The number of particles in each datasets are 9
INFO : The number of particles in each datasets are 3312


In [24]:
max_len = 0
for i in part:
    for j in i:
        if len(j) > max_len:
            max_len = len(j)

In [25]:
buffer_array = [0]*7
fin_array = part
for i in range(len(part)):
    for j in range(len(part[i])):
        #print(j)
        for l in range(max_len - len(part[i][j])):
           fin_array[i][j].append(buffer_array)

In [26]:
x_train = []
y_train = []

for i in range(len(fin_array) - 1):
    for j in range(len(fin_array[i])):
        x_train.append(fin_array[i][j])
        y_train.append(0)

for j in range(len(fin_array[-1])):
    x_train.append(fin_array[-1][j])
    y_train.append(1)

In [27]:
x_train = np.asarray(x_train)
y_train = np.asarray(y_train)

## Creating a custom rbf Layer

The layers required are 

s = sum(ReLU(W\[m;d\] + b))<br>
m = ReLU(W\[m\] + b)

This required a GNN, and is thus simpler on pyTorch Geometric

In [69]:
class GraphInitialize(tf.keras.Model):
    def __init__(self,units=32, input_dim=7):
        super(GraphInitialize, self).__init__()
        w_init = tf.random_normal_initializer()
        self.w = tf.Variable(
            initial_value=w_init(shape=(input_dim, units), dtype="float32"),
            trainable=True,
        )
        b_init = tf.zeros_initializer()
        self.b = tf.Variable(
            initial_value=b_init(shape=(units,), dtype="float32"), trainable=True
        )

    def call(self, inputs):
        m = []
        for input in inputs:
            m.append(tf.math.maximum(tf.matmul(input, self.w) + self.b,0))
        
        m = np.asarray(m)
        return m

In [88]:
class GraphPropogate(tf.keras.Model):
    def __init__(self,units=32, input_dim=7):
        super(GraphPropogate, self).__init__()
        w_init = tf.random_normal_initializer()
        self.w = tf.Variable(
            initial_value=w_init(shape=(input_dim, units), dtype="float32"),
            trainable=True,
        )
        b_init = tf.zeros_initializer()
        self.b = tf.Variable(
            initial_value=b_init(shape=(units,), dtype="float32"), trainable=True
        )

    def call(self, inputs):
        max_seq_len = inputs.shape[0]
        m = tf.TensorArray(tf.float32, size=max_seq_len)
        
        for i in tf.range(max_seq_len):
            m_temp = tf.matmul(input, self.w) + self.b
            m = m.write(i,m_temp)
        
        return m

In [89]:
graph_init = GraphPropogate()
y = graph_init(x_train)

ValueError: Attempt to convert a value (<bound method Kernel.raw_input of <ipykernel.ipkernel.IPythonKernel object at 0x7f52f3506040>>) with an unsupported type (<class 'method'>) to a Tensor.

In [72]:
y.shape, x_train.shape

((3948, 16, 32), (3948, 16, 7))

In [57]:
model = tf.keras.models.Sequential()

In [58]:
model.add(GraphInitialize())

In [73]:
model.compile(optimizer='adam', loss='binary_crossentropy')

In [74]:
model.fit(x_train,y_train)

InaccessibleTensorError: in user code:

    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/keras/engine/training.py:855 train_function  *
        return step_function(self, iterator)
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/keras/engine/training.py:845 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/distribute/distribute_lib.py:1285 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/distribute/distribute_lib.py:2833 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/distribute/distribute_lib.py:3608 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/keras/engine/training.py:838 run_step  **
        outputs = model.train_step(data)
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/keras/engine/training.py:796 train_step
        loss = self.compiled_loss(
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/keras/engine/compile_utils.py:204 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/keras/losses.py:155 __call__
        losses = call_fn(y_true, y_pred)
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/keras/losses.py:259 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/keras/losses.py:1754 binary_crossentropy
        backend.binary_crossentropy(
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/keras/backend.py:5035 binary_crossentropy
        output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_)
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/util/dispatch.py:206 wrapper
        return target(*args, **kwargs)
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/ops/clip_ops.py:111 clip_by_value
        t_min = math_ops.minimum(values, clip_value_max)
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/ops/gen_math_ops.py:6121 minimum
        _, _, _op, _outputs = _op_def_library._apply_op_helper(
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/framework/op_def_library.py:748 _apply_op_helper
        op = g._create_op_internal(op_type_name, inputs, dtypes=None,
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/framework/func_graph.py:597 _create_op_internal
        inp = self.capture(inp)
    /home/blizzard/.local/lib/python3.9/site-packages/tensorflow/python/framework/func_graph.py:642 capture
        raise errors.InaccessibleTensorError(

    InaccessibleTensorError: The tensor 'Tensor("sequential_8/graph_initialize_7/while/Maximum:0", shape=(16, 32), dtype=float32)' cannot be accessed here: it is defined in another function or code block. Use return values, explicit Python locals or TensorFlow collections to access it. Defined in: FuncGraph(name=sequential_8_graph_initialize_7_while_body_120033, id=139992127992304); accessed from: FuncGraph(name=train_function, id=139992123294048).
    
