In [1]:
import numpy as np
import torch.optim as optim
import torch.nn as nn
from sklearn.datasets import make_moons
# Set numpy random seed for reproducibility
np.random.seed(42)
import time
import jetnet
from jetnet.datasets import JetNet
from jetnet.utils import jet_features

from jetnet_diffusion import *
from configs import *


    # Set device (use GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')


particle_data = np.load('datasets/jetnet/particle_data.npy')
print(f'particle_data.shape: {particle_data.shape}')

jet_data = np.load('datasets/jetnet/jet_data.npy')

print('using substructure')
if SUBSET is not None:
    jet_data = jet_data[:SUBSET]
    particle_data = particle_data[:SUBSET, :, :]

print(f'jet_data.shape: {jet_data.shape}')
print(f'particle_data.shape: {particle_data.shape}')
num_types = len(data_args["jet_type"])
print(f'num_types: {num_types}')
type_indices = {jet_type: JetNet.JET_TYPES.index(jet_type) for jet_type in data_args["jet_type"]}# these are special indices for gluon, top quark, and W boson jets
print(f'type_indices: {type_indices}') 

Issue: coffea.nanoevents.methods.vector will be removed and replaced with scikit-hep vector. Nanoevents schemas internal to coffea will be migrated. Otherwise please consider using that package!.
  from coffea.nanoevents.methods import vector
  return torch._C._show_config()


Using device: cpu
particle_data.shape: (532369, 30, 3)
using substructure
jet_data.shape: (100, 4)
particle_data.shape: (100, 30, 3)
num_types: 3
type_indices: {'g': 0, 't': 2, 'w': 3}


  selected_observables_labels = ['$m^{rel}$', '$p_T^{rel}$', '$\eta^{rel}$']


In [2]:
particle_data

array([[[-8.72522220e-03,  2.35612951e-02,  4.32597429e-01],
        [-6.22524740e-03,  2.60625388e-02,  9.14876685e-02],
        [ 3.39764287e-03,  2.16538049e-02,  5.66903986e-02],
        ...,
        [ 2.08774790e-01, -2.04054296e-01,  3.19446228e-03],
        [-1.44715101e-01, -5.46260066e-02,  3.11212032e-03],
        [-1.30169660e-01,  1.03919894e-01,  3.04984534e-03]],

       [[ 6.96837679e-02, -6.71379417e-02,  1.76177040e-01],
        [ 8.00953805e-02, -6.08508959e-02,  1.26247332e-01],
        [ 9.02179852e-02, -6.43948615e-02,  1.00311011e-01],
        ...,
        [ 5.71837723e-02, -8.96491632e-02,  6.63618371e-03],
        [ 1.14411823e-01,  3.18141766e-02,  6.63276110e-03],
        [ 8.58477354e-02, -6.48238733e-02,  6.11719536e-03]],

       [[-6.60860017e-02,  1.04422897e-01,  1.54327020e-01],
        [-3.83191518e-02, -5.19351587e-02,  8.77668038e-02],
        [-8.39861706e-02,  9.06001776e-03,  5.59205078e-02],
        ...,
        [-1.17115505e-01,  6.92059025e-02,

In [6]:
jet_point_list=[]
for i in range(particle_data.shape[0]):
    jet_point_list.append(torch.tensor(particle_data[i]))

In [7]:
jet_point_list

[tensor([[-0.0087,  0.0236,  0.4326],
         [-0.0062,  0.0261,  0.0915],
         [ 0.0034,  0.0217,  0.0567],
         [-0.0012,  0.0261,  0.0437],
         [-0.0087,  0.0136,  0.0434],
         [-0.0051,  0.0273,  0.0386],
         [-0.0052,  0.0213,  0.0347],
         [-0.0005,  0.0205,  0.0324],
         [-0.0087,  0.0536,  0.0172],
         [ 0.1802, -0.2968,  0.0146],
         [ 0.2364, -0.2809,  0.0114],
         [-0.0187,  0.0636,  0.0113],
         [ 0.0038,  0.0311,  0.0110],
         [ 0.0089,  0.0234,  0.0101],
         [ 0.2038, -0.2541,  0.0100],
         [-0.0162, -0.0640,  0.0084],
         [-0.0012,  0.0311,  0.0080],
         [-0.0576, -0.0389,  0.0072],
         [-0.0262,  0.0261,  0.0066],
         [ 0.0213,  0.0487,  0.0066],
         [ 0.0143,  0.0286,  0.0060],
         [-0.0241,  0.0134,  0.0048],
         [ 0.0886, -0.0800,  0.0047],
         [ 0.2088, -0.3391,  0.0045],
         [ 0.0543, -0.0759,  0.0040],
         [ 0.0211, -0.3142,  0.0035],
         [-0

In [15]:
from torch_geometric.data import Data, Batch
from torch_geometric.nn import knn_graph

# Initialize lists to store all points and batch indices
points_list = []
batch_indices = []

# Loop over each jet to prepare data
for i, jet_points in enumerate(jet_point_list):
    # Append points to the points list
    points_list.append(jet_points)
    # Create batch indices for this jet and append
    num_points = jet_points.size(0)
    
    batch_indices.append(torch.full((num_points,), i, dtype=torch.long))


In [12]:
torch.full((data_list[0].size(0),), 1)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1])

In [16]:
batch_indices

[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0]),
 tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1]),
 tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
         2, 2, 2, 2, 2, 2]),
 tensor([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
         3, 3, 3, 3, 3, 3]),
 tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
         4, 4, 4, 4, 4, 4]),
 tensor([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
         5, 5, 5, 5, 5, 5]),
 tensor([6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
         6, 6, 6, 6, 6, 6]),
 tensor([7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
         7, 7, 7, 7, 7, 7]),
 tensor([8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
         8, 8, 8, 8, 8, 8]),
 tensor([9

In [17]:
all_points = torch.cat(points_list, dim=0)  # Shape: [total_points, num_features]
batch = torch.cat(batch_indices)            # Shape: [total_points]


In [18]:
edge_index = knn_graph(all_points, k=10, batch=batch, loop=False)


In [19]:
data = Data(x=all_points, edge_index=edge_index, batch=batch)


In [20]:
data

Data(x=[3000, 3], edge_index=[2, 30001], batch=[3000])

In [21]:
data.x

tensor([[-0.0087,  0.0236,  0.4326],
        [-0.0062,  0.0261,  0.0915],
        [ 0.0034,  0.0217,  0.0567],
        ...,
        [-0.1980,  0.0298,  0.0076],
        [ 0.0370, -0.0340,  0.0074],
        [-0.1934,  0.1960,  0.0071]])

In [22]:
data.edge_index

tensor([[   1,    2,    3,  ..., 2987, 2994, 2984],
        [   0,    0,    0,  ..., 2999, 2999, 2999]])

In [26]:
from torch_geometric.data import DataLoader

loader = DataLoader(data, batch_size=32
                    )




In [27]:
for batch in loader:
    print(batch.x)
    break

KeyError: 0