## Required Packages

In [2]:
#import uproot
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from sklearn.model_selection import cross_val_score, cross_val_predict, train_test_split
from collections import namedtuple, defaultdict
#import open3d as o3d
import random
random.seed(42)
import h5py
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from torch_geometric.utils import add_self_loops
from torchvision import transforms
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import joblib
import json

## Reading and arranging data 

In [3]:
# Short function to read hdf5 file stored as a dict
def hdf5_to_dict(hdf5_file):
    """
    Convert HDF5 file to Python dictionary
    """
    data_dict = {}
    _hdf5_to_dict(hdf5_file, data_dict)
    return data_dict

def _hdf5_to_dict(group, dic):
    """
    Convert HDF5 group to dictionary recursively
    """
    for key, item in group.items():
        if isinstance(item, h5py.Group):
            subgroup = {}
            _hdf5_to_dict(item, subgroup)
            dic[key] = subgroup
        else:
            dic[key] = np.array(item)

In [4]:
# cell features of 600 events (600 X 180k)
with h5py.File('./cellFeatures_600evs.hdf5', 'r') as f:
    # Convert HDF5 to dictionary
    data_dict = hdf5_to_dict(f)

In [5]:
f.close()

In [6]:
# storing 600 events as dict data_0, data_1 ... data_599 not sure why I am doing this
dynamic_variables = {}

# Generate variable names dynamically and assign values to them
for i in range(600):
    var_name = f"data_{i}"
    dynamic_variables[var_name] = data_dict[var_name]

In [8]:
# alligning data feature wise for scaling
data_600 = np.concatenate([value for key, value in list(dynamic_variables.items())])

In [9]:
data_600.shape

(112591200, 8)

In [11]:
# truth labels for each neighbour edges
hf_neigbor_truth_600evs = h5py.File("./neigbor_truth_600evs.hdf5", 'r')

In [12]:
neigbor_truth_600evs = hf_neigbor_truth_600evs.get("neigbor_truth_600evs")[:]

In [13]:
hf_neigbor_truth_600evs.close()

In [14]:
neigbor_truth_600evs.shape

(600, 1250242)

## Collecteing indices of neighbour edges based on truth (1,9,10,0)

In [15]:
# there are 4 types of neighbor labeling 
# true (same cluster cell pair): 1
# fake/bkg (lone/non participating cell pair): 9
# fake/bkg (cluster-lone cell pair): 0
# fake/bkg (cluster-cluster cell pair): 10

true = []
bkg_lone = []
bkg_cluster_lone = []
bkg_cluster_cluster = []

for i in range(600):
    true_pair = np.where(neigbor_truth_600evs[i]==1)
    true.append(list(true_pair[0]))
    bkg_lone_pair = np.where(neigbor_truth_600evs[i]==9)
    bkg_lone.append(list(bkg_lone_pair[0]))
    bkg_cluster_lone_pair = np.where(neigbor_truth_600evs[i]==0)
    bkg_cluster_lone.append(list(bkg_cluster_lone_pair[0]))
    bkg_cluster_cluster_pair = np.where(neigbor_truth_600evs[i]==10)
    bkg_cluster_cluster.append(list(bkg_cluster_cluster_pair[0]))

## Collecting length of each edge (true, bkg_lone, bkg_cluster_lone, bkg_cluster_cluster)

In [16]:
# checking length is important since its complicated to deal with awkward arrays
# try make sample as n x m matrix (so n: events, m: true/bkg samples)

true_len = []
bkg_lone_len = []
bkg_cluster_lone_len = []
bkg_cluster_cluster_len = []
for i in range(600):
    true_len.append(len(true[i]))
    bkg_lone_len.append(len(bkg_lone[i]))
    bkg_cluster_lone_len.append(len(bkg_cluster_lone[i]))
    bkg_cluster_cluster_len.append(len(bkg_cluster_cluster[i]))

## Sorting all len arrays in Dscennding order

In [17]:
true_len = np.array(true_len)
bkg_lone_len = np.array(bkg_lone_len)
bkg_cluster_lone_len = np.array(bkg_cluster_lone_len)
bkg_cluster_cluster_len = np.array(bkg_cluster_cluster_len)

In [18]:
# sorting 600 events in descending orders of cluster_cluster sample len as they are the rarest
sorted_cluster_cluster_len_indices = np.argsort(-bkg_cluster_cluster_len)

In [19]:
# Rearranging all edge/pair samples of 600 events based on cluster_cluster sample len
true_len_sorted = true_len[sorted_cluster_cluster_len_indices]
bkg_lone_len_sorted = bkg_lone_len[sorted_cluster_cluster_len_indices]
bkg_cluster_lone_len_sorted = bkg_cluster_lone_len[sorted_cluster_cluster_len_indices]
bkg_cluster_cluster_len_sorted = bkg_cluster_cluster_len[sorted_cluster_cluster_len_indices]

In [20]:
# Rearranging all edge/pair samples of 600 events based on cluster_cluster sample len
true_sorted = [true[i] for i in sorted_cluster_cluster_len_indices]
bkg_lone_sorted = [bkg_lone[i] for i in sorted_cluster_cluster_len_indices]
bkg_cluster_lone_sorted = [bkg_cluster_lone[i] for i in sorted_cluster_cluster_len_indices]
bkg_cluster_cluster_sorted = [bkg_cluster_cluster[i] for i in sorted_cluster_cluster_len_indices]

In [21]:
#train len
true_len_sorted_500 = true_len_sorted[:500]
bkg_lone_len_sorted_500 = bkg_lone_len_sorted[:500]
bkg_cluster_lone_len_sorted_500 = bkg_cluster_lone_len_sorted[:500]
bkg_cluster_cluster_len_sorted_500 = bkg_cluster_cluster_len_sorted[:500]

#test len
true_len_sorted_100 = true_len_sorted[500:]
bkg_lone_len_sorted_100 = bkg_lone_len_sorted[500:]
bkg_cluster_lone_len_sorted_100 = bkg_cluster_lone_len_sorted[500:]
bkg_cluster_cluster_len_sorted_100 = bkg_cluster_cluster_len_sorted[500:]

In [22]:
#train sample
true_sorted_500 = true_sorted[:500]
bkg_lone_sorted_500 = bkg_lone_sorted[:500]
bkg_cluster_lone_sorted_500 = bkg_cluster_lone_sorted[:500]
bkg_cluster_cluster_sorted_500 = bkg_cluster_cluster_sorted[:500]

#test sample
true_sorted_100 = true_sorted[500:]
bkg_lone_sorted_100 = bkg_lone_sorted[500:]
bkg_cluster_lone_sorted_100 = bkg_cluster_lone_sorted[500:]
bkg_cluster_cluster_sorted_100 = bkg_cluster_cluster_sorted[500:]


In [22]:
len(bkg_cluster_cluster_sorted_500)

500

### Checking the minimum length of each type of edges (true, bkg_l, bkg_c_l, bkg_c_c)

In [23]:
min(true_len_sorted_500)

15451

In [24]:
min(bkg_lone_len_sorted_500)

858254

In [25]:
min(bkg_cluster_lone_len_sorted_500)

24187

In [26]:
min(bkg_cluster_cluster_len_sorted_500)

2101

### Based on minimum lengths concluding true: 15k, bkg_l: 6_5k, bkg_c_l: 6_5k, bkg_c_c: 2k

In [28]:
true_15k = [random.sample(row, 15000) for row in true_sorted_500]
bkg_lone_6p5k = [random.sample(row, 6500) for row in bkg_lone_sorted_500]
bkg_cluster_lone_6p5k = [random.sample(row, 6500) for row in bkg_cluster_lone_sorted_500]
bkg_cluster_cluster_2k = [random.sample(row, 2000) for row in bkg_cluster_cluster_sorted_500]

In [29]:
true_15k = np.array(true_15k)
bkg_lone_6p5k = np.array(bkg_lone_6p5k)
bkg_cluster_lone_6p5k = np.array(bkg_cluster_lone_6p5k)
bkg_cluster_cluster_2k = np.array(bkg_cluster_cluster_2k)

In [30]:
true_15k.shape

(500, 15000)

In [31]:
bkg_lone_6p5k.shape

(500, 6500)

In [32]:
bkg_cluster_cluster_2k.shape

(500, 2000)

In [33]:
bkg_total = np.concatenate((bkg_lone_6p5k, bkg_cluster_lone_6p5k, bkg_cluster_cluster_2k), axis =1)

In [34]:
bkg_total.shape

(500, 15000)

### Labelling for true/signal and fake/bkg as 1, 0

In [35]:
bkg_truth = np.zeros((500, 15000), dtype=int)
true_truth = np.ones((500, 15000), dtype=int)

In [36]:
total_truth = np.concatenate((true_truth,bkg_truth), axis =1)

In [37]:
total_truth.shape

(500, 30000)

In [38]:
total_training_indices = np.concatenate((true_15k,bkg_total), axis =1)

In [39]:
total_training_indices.shape

(500, 30000)

In [40]:
rand_index = []
for i in range(500):
    arr = np.arange(30000)
    np.random.shuffle(arr)
    rand_index.append(arr)

In [41]:
rand_index = np.array(rand_index)

In [42]:
rand_index[0]

array([ 8281, 17923,   123, ..., 16188, 23927, 27431])

In [43]:
# Randomizing training sample
total_training_indices_rand = []
for i in range(500):
    total_training_indices_rand.append(total_training_indices[i][rand_index[i]])

In [44]:
# Randomizing truth of training sample according to the sample
total_truth_rand = []
for i in range(500):
    total_truth_rand.append(total_truth[i][rand_index[i]])

In [45]:
total_training_indices_rand = np.array(total_training_indices_rand)

In [46]:
total_truth_rand = np.array(total_truth_rand)

In [47]:
total_truth_rand[0]

array([1, 0, 1, ..., 0, 0, 0])

In [48]:
with h5py.File('./truth_label_train_500evs.hdf5', 'w') as f: 
    dset = f.create_dataset("truth_label_train", data = total_truth_rand)

In [49]:
total_training_indices_rand.shape

(500, 30000)

In [32]:
hf_neighbor_pairs_unique_sorted= h5py.File("./neighbor_pairs_unique_sorted.hdf5", 'r')
neighbor_pairs_unique_sorted = hf_neighbor_pairs_unique_sorted.get("neighbor_pair")[:]
hf_neighbor_pairs_unique_sorted.close()

In [51]:
# Arranging training edges with training indices
total_training_edges_rand = []
for i in range(500):
    total_training_edges_rand.append(neighbor_pairs_unique_sorted[total_training_indices_rand[i]])

In [52]:
total_training_edges_rand = np.array(total_training_edges_rand)

In [53]:
total_training_edges_rand.shape

(500, 30000, 2)

In [54]:
train_edge_source_BD =[]
for i in range(500):
    _train_edge_source_BD = []
    for pair in total_training_edges_rand[i]:
        _train_edge_source_BD.append(pair[0])
        _train_edge_source_BD.append(pair[1])
    train_edge_source_BD.append(_train_edge_source_BD)

In [55]:
train_edge_source_BD = np.array(train_edge_source_BD)

In [56]:
train_edge_source_BD.shape

(500, 60000)

In [57]:
train_edge_dest_BD =[]
for i in range(500):
    _train_edge_dest_BD = []
    for pair in total_training_edges_rand[i]:
        _train_edge_dest_BD.append(pair[1])
        _train_edge_dest_BD.append(pair[0])
    train_edge_dest_BD.append(_train_edge_dest_BD)

In [58]:
train_edge_dest_BD = np.array(train_edge_dest_BD)

In [59]:
train_edge_dest_BD.shape

(500, 60000)

In [60]:
train_edge_source_noBD =[]
for i in range(500):
    _train_edge_source_noBD =[]
    for pair in total_training_edges_rand[i]:
        _train_edge_source_noBD.append(pair[0])
    train_edge_source_noBD.append(_train_edge_source_noBD)

train_edge_dest_noBD =[]
for i in range(500):
    _train_edge_dest_noBD =[]
    for pair in total_training_edges_rand[i]:
        _train_edge_dest_noBD.append(pair[1])
    train_edge_dest_noBD.append(_train_edge_dest_noBD)

In [61]:
train_edge_source_noBD = np.array(train_edge_source_noBD)
train_edge_dest_noBD = np.array(train_edge_dest_noBD)

In [62]:
train_edge_source_noBD.shape

(500, 30000)

In [63]:
train_edge_dest_noBD.shape

(500, 30000)

In [64]:
with h5py.File('./train_edge_source_BD_500evs.hdf5', 'w') as f: 
    dset = f.create_dataset("train_edge_source_BD", data = train_edge_source_BD)

with h5py.File('./train_edge_dest_BD_500evs.hdf5', 'w') as f: 
    dset = f.create_dataset("train_edge_dest_BD", data = train_edge_dest_BD)
    
    
with h5py.File('./train_edge_source_noBD_500evs.hdf5', 'w') as f: 
    dset = f.create_dataset("train_edge_source_noBD", data = train_edge_source_noBD)

with h5py.File('./train_edge_dest_noBD_500evs.hdf5', 'w') as f: 
    dset = f.create_dataset("train_edge_dest_noBD", data = train_edge_dest_noBD)

# Scaling of features

In [65]:
keys = list(dynamic_variables.keys())
values = list(dynamic_variables.values())

In [67]:
rearranged_values = [values[i] for i in sorted_cluster_cluster_len_indices]

In [68]:
rearranged_dict = dict(zip(keys, rearranged_values))

In [69]:
data_500 = np.concatenate([value for key, value in list(rearranged_dict.items())[:500]])

In [70]:
data_100 = np.concatenate([value for key, value in list(dynamic_variables.items())[500:]])

In [71]:
scaler = MinMaxScaler()
cellFeatures_trainS = scaler.fit_transform(data_500)
scaler_filename = "./scaler_neighbor_data_500_sorted.save"
joblib.dump(scaler, scaler_filename)

['./scaler_neighbor_data_500_sorted.save']

In [72]:
cellFeatures_trainS[1]

array([0.58421445, 0.51289224, 0.16380877, 0.23466876, 0.52418755,
       0.26086957, 0.09700815, 0.15288134])

In [73]:
cellFeatures_trainS_reshape = cellFeatures_trainS.reshape(500, 187652, 8)

In [74]:
scaler = joblib.load('./scaler_neighbor_data_500_sorted.save') 
cellFeatures_testS = scaler.transform(data_100)

In [75]:
cellFeatures_testS_reshape = cellFeatures_testS.reshape(100, 187652, 8)

In [76]:
# creating Scaled Cell Feature file
with h5py.File('./cellFeaturesScaled_train_500evs.hdf5', 'w') as f: 
    dset = f.create_dataset("cellFeatures_trainS", data = cellFeatures_trainS_reshape)

In [77]:
with h5py.File('./cellFeaturesScaled_test_100evs.hdf5', 'w') as f: 
    dset = f.create_dataset("cellFeatures_testS", data = cellFeatures_testS_reshape)

### for test sample

In [23]:
true_len_sorted_100 = true_len_sorted[500:]
bkg_lone_len_sorted_100 = bkg_lone_len_sorted[500:]
bkg_cluster_lone_len_sorted_100 = bkg_cluster_lone_len_sorted[500:]
bkg_cluster_cluster_len_sorted_100 = bkg_cluster_cluster_len_sorted[500:]

In [24]:
min(true_len_sorted_100)

9692

In [25]:
min(bkg_lone_len_sorted_100)

1156244

In [26]:
min(bkg_cluster_lone_len_sorted_100)

17994

In [27]:
min(bkg_cluster_cluster_len_sorted_100)

651

In [29]:
true_9k = [random.sample(row, 9000) for row in true_sorted_100]
bkg_lone_30k = [random.sample(row, 30000) for row in bkg_lone_sorted_100]
bkg_cluster_lone_17k = [random.sample(row, 17000) for row in bkg_cluster_lone_sorted_100]
bkg_cluster_cluster_600 = [random.sample(row, 600) for row in bkg_cluster_cluster_sorted_100]

In [33]:
true_test_edges = []
bkg_lone_test_edges = []
bkg_cluster_lone_test_edges = []
bkg_cluster_cluster_test_edges = []

for i in range(100):
    true_test_edges.append(neighbor_pairs_unique_sorted[true_9k[i]])
    bkg_lone_test_edges.append(neighbor_pairs_unique_sorted[bkg_lone_30k[i]])
    bkg_cluster_lone_test_edges.append(neighbor_pairs_unique_sorted[bkg_cluster_lone_17k[i]])
    bkg_cluster_cluster_test_edges.append(neighbor_pairs_unique_sorted[bkg_cluster_cluster_600[i]])

In [34]:
true_test_edges = np.array(true_test_edges)

In [35]:
true_test_edges.shape

(100, 9000, 2)

In [36]:
test_edge_source_true_BD =[]
for i in range(100):
    _test_edge_source_BD = []
    for pair in true_test_edges[i]:
        _test_edge_source_BD.append(pair[0])
        _test_edge_source_BD.append(pair[1])
    test_edge_source_true_BD.append(_test_edge_source_BD)

In [37]:
test_edge_dest_true_BD =[]
for i in range(100):
    _test_edge_dest_BD = []
    for pair in true_test_edges[i]:
        _test_edge_dest_BD.append(pair[1])
        _test_edge_dest_BD.append(pair[0])
    test_edge_dest_true_BD.append(_test_edge_dest_BD)

In [38]:
test_edge_source_bkg_lone_BD =[]
for i in range(100):
    _test_edge_source_BD = []
    for pair in bkg_lone_test_edges[i]:
        _test_edge_source_BD.append(pair[0])
        _test_edge_source_BD.append(pair[1])
    test_edge_source_bkg_lone_BD.append(_test_edge_source_BD)

In [39]:
test_edge_dest_bkg_lone_BD =[]
for i in range(100):
    _test_edge_dest_BD = []
    for pair in bkg_lone_test_edges[i]:
        _test_edge_dest_BD.append(pair[1])
        _test_edge_dest_BD.append(pair[0])
    test_edge_dest_bkg_lone_BD.append(_test_edge_dest_BD)

In [40]:
test_edge_source_bkg_cluster_lone_BD =[]
for i in range(100):
    _test_edge_source_BD = []
    for pair in bkg_cluster_lone_test_edges[i]:
        _test_edge_source_BD.append(pair[0])
        _test_edge_source_BD.append(pair[1])
    test_edge_source_bkg_cluster_lone_BD.append(_test_edge_source_BD)

In [41]:
test_edge_dest_bkg_cluster_lone_BD =[]
for i in range(100):
    _test_edge_dest_BD = []
    for pair in bkg_cluster_lone_test_edges[i]:
        _test_edge_dest_BD.append(pair[1])
        _test_edge_dest_BD.append(pair[0])
    test_edge_dest_bkg_cluster_lone_BD.append(_test_edge_dest_BD)

In [42]:
test_edge_source_bkg_cluster_cluster_BD =[]
for i in range(100):
    _test_edge_source_BD = []
    for pair in bkg_cluster_cluster_test_edges[i]:
        _test_edge_source_BD.append(pair[0])
        _test_edge_source_BD.append(pair[1])
    test_edge_source_bkg_cluster_cluster_BD.append(_test_edge_source_BD)

In [43]:
test_edge_dest_bkg_cluster_cluster_BD =[]
for i in range(100):
    _test_edge_dest_BD = []
    for pair in bkg_cluster_cluster_test_edges[i]:
        _test_edge_dest_BD.append(pair[1])
        _test_edge_dest_BD.append(pair[0])
    test_edge_dest_bkg_cluster_cluster_BD.append(_test_edge_dest_BD)

In [44]:
test_edge_source_true_noBD =[]
for i in range(100):
    _test_edge_source_noBD =[]
    for pair in true_test_edges[i]:
        _test_edge_source_noBD.append(pair[0])
    test_edge_source_true_noBD.append(_test_edge_source_noBD)

test_edge_dest_true_noBD =[]
for i in range(100):
    _test_edge_dest_noBD =[]
    for pair in true_test_edges[i]:
        _test_edge_dest_noBD.append(pair[1])
    test_edge_dest_true_noBD.append(_test_edge_dest_noBD)

In [45]:
test_edge_source_bkg_lone_noBD =[]
for i in range(100):
    _test_edge_source_noBD =[]
    for pair in bkg_lone_test_edges[i]:
        _test_edge_source_noBD.append(pair[0])
    test_edge_source_bkg_lone_noBD.append(_test_edge_source_noBD)

test_edge_dest_bkg_lone_noBD =[]
for i in range(100):
    _test_edge_dest_noBD =[]
    for pair in bkg_lone_test_edges[i]:
        _test_edge_dest_noBD.append(pair[1])
    test_edge_dest_bkg_lone_noBD.append(_test_edge_dest_noBD)

In [46]:
test_edge_source_bkg_cluster_lone_noBD =[]
for i in range(100):
    _test_edge_source_noBD =[]
    for pair in bkg_cluster_lone_test_edges[i]:
        _test_edge_source_noBD.append(pair[0])
    test_edge_source_bkg_cluster_lone_noBD.append(_test_edge_source_noBD)

test_edge_dest_bkg_cluster_lone_noBD =[]
for i in range(100):
    _test_edge_dest_noBD =[]
    for pair in bkg_cluster_lone_test_edges[i]:
        _test_edge_dest_noBD.append(pair[1])
    test_edge_dest_bkg_cluster_lone_noBD.append(_test_edge_dest_noBD)

In [47]:
test_edge_source_bkg_cluster_cluster_noBD =[]
for i in range(100):
    _test_edge_source_noBD =[]
    for pair in bkg_cluster_cluster_test_edges[i]:
        _test_edge_source_noBD.append(pair[0])
    test_edge_source_bkg_cluster_cluster_noBD.append(_test_edge_source_noBD)

test_edge_dest_bkg_cluster_cluster_noBD =[]
for i in range(100):
    _test_edge_dest_noBD =[]
    for pair in bkg_cluster_cluster_test_edges[i]:
        _test_edge_dest_noBD.append(pair[1])
    test_edge_dest_bkg_cluster_cluster_noBD.append(_test_edge_dest_noBD)

In [48]:
with h5py.File('./test_edge_source_true_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_true_BD", data = test_edge_source_true_BD)

with h5py.File('./test_edge_dest_true_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_true_BD", data = test_edge_dest_true_BD)
    
    
with h5py.File('./test_edge_source_true_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_true_noBD", data = test_edge_source_true_noBD)

with h5py.File('./test_edge_dest_true_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_true_noBD", data = test_edge_dest_true_noBD)

In [49]:
with h5py.File('./test_edge_source_bkg_lone_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_bkg_lone_BD", data = test_edge_source_bkg_lone_BD)

with h5py.File('./test_edge_dest_bkg_lone_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_bkg_lone_BD", data = test_edge_dest_bkg_lone_BD)
    
    
with h5py.File('./test_edge_source_bkg_lone_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_bkg_lone_noBD", data = test_edge_source_bkg_lone_noBD)

with h5py.File('./test_edge_dest_bkg_lone_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_bkg_lone_noBD", data = test_edge_dest_bkg_lone_noBD)

In [50]:
with h5py.File('./test_edge_source_bkg_cluster_lone_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_bkg_cluster_lone_BD", data = test_edge_source_bkg_cluster_lone_BD)

with h5py.File('./test_edge_dest_bkg_cluster_lone_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_bkg_cluster_lone_BD", data = test_edge_dest_bkg_cluster_lone_BD)
    
    
with h5py.File('./test_edge_source_bkg_cluster_lone_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_bkg_cluster_lone_noBD", data = test_edge_source_bkg_cluster_lone_noBD)

with h5py.File('./test_edge_dest_bkg_cluster_lone_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_bkg_cluster_lone_noBD", data = test_edge_dest_bkg_cluster_lone_noBD)

In [51]:
with h5py.File('./test_edge_source_bkg_cluster_cluster_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_bkg_cluster_cluster_BD", data = test_edge_source_bkg_cluster_cluster_BD)

with h5py.File('./test_edge_dest_bkg_cluster_cluster_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_bkg_cluster_cluster_BD", data = test_edge_dest_bkg_cluster_cluster_BD)
    
    
with h5py.File('./test_edge_source_bkg_cluster_cluster_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_bkg_cluster_cluster_noBD", data = test_edge_source_bkg_cluster_cluster_noBD)

with h5py.File('./test_edge_dest_bkg_cluster_cluster_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_bkg_cluster_cluster_noBD", data = test_edge_dest_bkg_cluster_cluster_noBD)