# QML-HEP GSoC 2024 Tasks

### Task II: Classical Graph Neural Network (GNN

ParticleNet’s data for Quark/Gluon jet classification available here with its corresponding description: https://zenodo.org/record/3164691#.YigdGt9MHrB

● Choose 2 Graph-based architectures of your choice to classify jets as being quarks or gluons. Provide a description on what considerations you have taken to project this point-cloud dataset to a set of interconnected nodes and edges. 

● Discuss the resulting performance of the 2 chosen architectures.

In [11]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.16.1-cp310-cp310-win_amd64.whl.metadata (3.5 kB)
Collecting tensorflow-intel==2.16.1 (from tensorflow)
  Downloading tensorflow_intel-2.16.1-cp310-cp310-win_amd64.whl.metadata (5.0 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.16.1->tensorflow)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.16.1->tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=23.5.26 (from tensorflow-intel==2.16.1->tensorflow)
  Downloading flatbuffers-24.3.7-py2.py3-none-any.whl.metadata (849 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.16.1->tensorflow)
  Downloading gast-0.5.4-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.16.1->tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorfl

In [23]:
!pip install tf_keras

Collecting tf_keras
  Downloading tf_keras-2.16.0-py3-none-any.whl.metadata (1.6 kB)
Downloading tf_keras-2.16.0-py3-none-any.whl (1.7 MB)
   ---------------------------------------- 0.0/1.7 MB ? eta -:--:--
   ---- ----------------------------------- 0.2/1.7 MB 6.3 MB/s eta 0:00:01
   ---- ----------------------------------- 0.2/1.7 MB 6.3 MB/s eta 0:00:01
   ----------------- ---------------------- 0.7/1.7 MB 5.8 MB/s eta 0:00:01
   ---------------------------------------  1.7/1.7 MB 9.9 MB/s eta 0:00:01
   ---------------------------------------- 1.7/1.7 MB 8.4 MB/s eta 0:00:00
Installing collected packages: tf_keras
Successfully installed tf_keras-2.16.0


In [28]:
!wget https://raw.githubusercontent.com/hqucms/ParticleNet/master/tf-keras/tf_keras_model.py

'wget' is not recognized as an internal or external command,
operable program or batch file.


In [30]:
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
# from tf_keras import get_particle_net_lite, get_partic
# le_net
# import tf_kerasmodel
from tf_keras_model import get_particle_net_lite

In [12]:
import energyflow
from tensorflow import keras

In [1]:

qg_dataset = energyflow.qg_jets.load(num_data=100000, pad=True, ncol=4, generator='pythia',with_bc=False, cache_dir='~/.energyflow')

In [2]:
qg_dataset

(array([[[ 2.68769142e-01,  3.56903171e-01,  4.74138734e+00,
           2.20000000e+01],
         [ 1.60076377e-01, -2.55609533e-01,  4.55022910e+00,
           2.20000000e+01],
         [ 1.14868731e+00, -6.24380156e-02,  4.50385377e+00,
          -2.11000000e+02],
         ...,
         [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
           0.00000000e+00],
         [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
           0.00000000e+00],
         [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
           0.00000000e+00]],
 
        [[ 1.21266321e+00, -1.12853089e-01,  3.04708757e+00,
          -2.11000000e+02],
         [ 2.40893976e-01, -1.67174886e-02,  2.82705667e+00,
           2.20000000e+01],
         [ 1.02778452e-01, -8.58720522e-02,  3.04180579e+00,
           2.20000000e+01],
         ...,
         [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
           0.00000000e+00],
         [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
           0.00000

In [5]:
x = qg_dataset[0]
y = qg_dataset[1]

In [6]:
print(x.shape)
print(y.shape)

(100000, 139, 4)
(100000,)


In [13]:
y = keras.utils.to_categorical(y)
print(y.shape)

(100000, 2)


In [16]:
x, y = shuffle(x, y, random_state=0)
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.30, random_state=42)
x_test, x_val, y_test, y_val = train_test_split(
    x_test, y_test, test_size=0.5, random_state=42)

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
print(x_val.shape, y_val.shape)

(70000, 139, 4) (70000, 2)
(15000, 139, 4) (15000, 2)
(15000, 139, 4) (15000, 2)


In [17]:
# mask
# find the maximum length vector in for each sample in 10000 samples with maximum number of non-zero values
mask_train = np.sum(x_train, axis=2)
print(mask_train.shape)
print(mask_train[0])

# make the array binary
mask_train = np.array(mask_train != 0, np.float32)
print(mask_train[0])

# reshape mask for a third axis
mask_train = mask_train.reshape(x_train.shape[0], x_train.shape[1], 1)
print(mask_train.shape)

(70000, 139)
[   29.20243269    27.50561143    30.26089051   220.41819997
    27.95042087    28.15969933   329.0687446     27.57628735
  -206.06003465   218.08468374    26.41602355    27.02195013
    31.79622486    26.59246689    26.88924931  -202.09822736
    26.71899363    31.23462352  -310.42681747    27.70451205
    30.7169403   -198.56554041  2233.77838649    36.42720615
    29.23621805  -187.38676832    33.46071738   268.17256202
    32.3521241  -2054.01856237   118.7019814     56.63953271
   151.53934524    57.07618763     0.             0.
     0.             0.             0.             0.
     0.             0.             0.             0.
     0.             0.             0.             0.
     0.             0.             0.             0.
     0.             0.             0.             0.
     0.             0.             0.             0.
     0.             0.             0.             0.
     0.             0.             0.             0.
     0.             0.

In [18]:
# mask
# find the maximum length vector in for each sample in 10000 samples with maximum number of non-zero values
mask_val = np.sum(x_val, axis=2)

# make the array binary
mask_val = np.array(mask_val != 0, np.float32)

# reshape mask for a third axis
mask_val = mask_val.reshape(x_val.shape[0], x_val.shape[1], 1)
print(mask_val.shape)

(15000, 139, 1)


In [19]:
# mask
# find the maximum length vector in for each sample in 10000 samples with maximum number of non-zero values
mask_test = np.sum(x_test, axis=2)

# make the array binary
mask_test = np.array(mask_test != 0, np.float32)

# reshape mask for a third axis
mask_test = mask_val.reshape(x_test.shape[0], x_test.shape[1], 1)
print(mask_test.shape)

(15000, 139, 1)


In [20]:
train_dataset = {
    'points': x_train[:, :, 1:3],
    'features': x_train,
    'mask': mask_train
}

test_dataset = {
    'points': x_test[:, :, 1:3],
    'features': x_test,
    'mask': mask_test
}

val_dataset = {
    'points': x_val[:, :, 1:3],
    'features': x_val,
    'mask': mask_val
}

In [21]:
shapes = {
    'points': x_train[:, :, 1:3].shape[1:],
    'features': x_train.shape[1:],
    'mask': mask_train.shape[1:]
}
shapes

{'points': (139, 2), 'features': (139, 4), 'mask': (139, 1)}

In [31]:
# particle net lite
num_classes = 2
model = get_particle_net_lite(num_classes, shapes)

# Training parameters
batch_size = 1024
epochs = 100

def lr_schedule(epoch):
    lr = 1e-3
    if epoch > 10:
        lr *= 0.1
    elif epoch > 20:
        lr *= 0.01
    logging.info('Learning rate: %f' % lr)
    return lr

model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.Adam(learning_rate=lr_schedule(0)),
              metrics=['accuracy'])
model.summary()

ValueError: A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.operations`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```
