## Required Packages

In [1]:
#import uproot
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from sklearn.model_selection import cross_val_score, cross_val_predict, train_test_split
from collections import namedtuple, defaultdict
#import open3d as o3d
import random
random.seed(42)
import h5py
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from torch_geometric.utils import add_self_loops
from torchvision import transforms
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import joblib
import json

## Reading and arranging data 

In [2]:
# Short function to read hdf5 file stored as a dict
def hdf5_to_dict(hdf5_file):
    """
    Convert HDF5 file to Python dictionary
    """
    data_dict = {}
    _hdf5_to_dict(hdf5_file, data_dict)
    return data_dict

def _hdf5_to_dict(group, dic):
    """
    Convert HDF5 group to dictionary recursively
    """
    for key, item in group.items():
        if isinstance(item, h5py.Group):
            subgroup = {}
            _hdf5_to_dict(item, subgroup)
            dic[key] = subgroup
        else:
            dic[key] = np.array(item)

In [3]:
# cell features of 100 events (100 X 180k)
with h5py.File('./cellFeatures_100evs.hdf5', 'r') as f:
    # Convert HDF5 to dictionary
    data_dict = hdf5_to_dict(f)

In [4]:
f.close()

In [5]:
# storing 100 events as dict data_0, data_1 ... data_599 not sure why I am doing this
dynamic_variables = {}

# Generate variable names dynamically and assign values to them
for i in range(100):
    var_name = f"data_{i}"
    dynamic_variables[var_name] = data_dict[var_name]

In [6]:
# alligning data feature wise for scaling
data_100 = np.concatenate([value for key, value in list(dynamic_variables.items())])

In [7]:
data_100.shape

(18765200, 8)

In [8]:
# truth labels for each neighbour edges
hf_neigbor_truth_100evs = h5py.File("./neigbor_truth_100evs.hdf5", 'r')

In [10]:
neigbor_truth_100evs = hf_neigbor_truth_100evs.get("neigbor_truth_100evs")[:]

In [11]:
hf_neigbor_truth_100evs.close()

In [12]:
neigbor_truth_100evs.shape

(100, 1250242)

## Collecteing indices of neighbour edges based on truth (1,9,10,0)

In [13]:
# there are 4 types of neighbor labeling 
# true (same cluster cell pair): 1
# fake/bkg (lone/non participating cell pair): 9
# fake/bkg (cluster-lone cell pair): 0
# fake/bkg (cluster-cluster cell pair): 10

true = []
bkg_lone = []
bkg_cluster_lone = []
bkg_cluster_cluster = []

for i in range(100):
    true_pair = np.where(neigbor_truth_100evs[i]==1)
    true.append(list(true_pair[0]))
    bkg_lone_pair = np.where(neigbor_truth_100evs[i]==9)
    bkg_lone.append(list(bkg_lone_pair[0]))
    bkg_cluster_lone_pair = np.where(neigbor_truth_100evs[i]==0)
    bkg_cluster_lone.append(list(bkg_cluster_lone_pair[0]))
    bkg_cluster_cluster_pair = np.where(neigbor_truth_100evs[i]==10)
    bkg_cluster_cluster.append(list(bkg_cluster_cluster_pair[0]))

## Collecting length of each edge (true, bkg_lone, bkg_cluster_lone, bkg_cluster_cluster)

In [14]:
# checking length is important since its complicated to deal with awkward arrays
# try make sample as n x m matrix (so n: events, m: true/bkg samples)

true_len = []
bkg_lone_len = []
bkg_cluster_lone_len = []
bkg_cluster_cluster_len = []
for i in range(100):
    true_len.append(len(true[i]))
    bkg_lone_len.append(len(bkg_lone[i]))
    bkg_cluster_lone_len.append(len(bkg_cluster_lone[i]))
    bkg_cluster_cluster_len.append(len(bkg_cluster_cluster[i]))

## Sorting all len arrays in Dscennding order of cluster_cluster bkg sample

In [15]:
true_len = np.array(true_len)
bkg_lone_len = np.array(bkg_lone_len)
bkg_cluster_lone_len = np.array(bkg_cluster_lone_len)
bkg_cluster_cluster_len = np.array(bkg_cluster_cluster_len)

In [16]:
# sorting 100 events in descending orders of cluster_cluster sample len as they are the rarest
sorted_cluster_cluster_len_indices = np.argsort(-bkg_cluster_cluster_len)

In [17]:
# Rearranging all edge/pair samples of 100 events based on cluster_cluster sample len
true_len_sorted = true_len[sorted_cluster_cluster_len_indices]
bkg_lone_len_sorted = bkg_lone_len[sorted_cluster_cluster_len_indices]
bkg_cluster_lone_len_sorted = bkg_cluster_lone_len[sorted_cluster_cluster_len_indices]
bkg_cluster_cluster_len_sorted = bkg_cluster_cluster_len[sorted_cluster_cluster_len_indices]

In [18]:
# Rearranging all edge/pair samples of 100 events based on cluster_cluster sample len
true_sorted = [true[i] for i in sorted_cluster_cluster_len_indices]
bkg_lone_sorted = [bkg_lone[i] for i in sorted_cluster_cluster_len_indices]
bkg_cluster_lone_sorted = [bkg_cluster_lone[i] for i in sorted_cluster_cluster_len_indices]
bkg_cluster_cluster_sorted = [bkg_cluster_cluster[i] for i in sorted_cluster_cluster_len_indices]

In [83]:
#train len
true_len_sorted_70 = true_len_sorted[:70]
bkg_lone_len_sorted_70 = bkg_lone_len_sorted[:70]
bkg_cluster_lone_len_sorted_70 = bkg_cluster_lone_len_sorted[:70]
bkg_cluster_cluster_len_sorted_70 = bkg_cluster_cluster_len_sorted[:70]

#test len
true_len_sorted_30 = true_len_sorted[70:]
bkg_lone_len_sorted_30 = bkg_lone_len_sorted[70:]
bkg_cluster_lone_len_sorted_30 = bkg_cluster_lone_len_sorted[70:]
bkg_cluster_cluster_len_sorted_30 = bkg_cluster_cluster_len_sorted[70:]

In [84]:
#train sample
true_sorted_70 = true_sorted[:70]
bkg_lone_sorted_70 = bkg_lone_sorted[:70]
bkg_cluster_lone_sorted_70 = bkg_cluster_lone_sorted[:70]
bkg_cluster_cluster_sorted_70 = bkg_cluster_cluster_sorted[:70]

#test sample
true_sorted_30 = true_sorted[70:]
bkg_lone_sorted_30 = bkg_lone_sorted[70:]
bkg_cluster_lone_sorted_30 = bkg_cluster_lone_sorted[70:]
bkg_cluster_cluster_sorted_30 = bkg_cluster_cluster_sorted[70:]


In [21]:
len(bkg_cluster_cluster_sorted_70)

70

### Checking the minimum length of each type of edges (true, bkg_l, bkg_c_l, bkg_c_c)

In [85]:
min(true_len_sorted_70)

65101

In [86]:
min(bkg_lone_len_sorted_70)

906630

In [25]:
min(bkg_cluster_lone_len_sorted_70)

95058

In [26]:
min(bkg_cluster_cluster_len_sorted_70)

9993

### Based on minimum lengths concluding true: 15k, bkg_l: 6_5k, bkg_c_l: 6_5k, bkg_c_c: 2k

In [27]:
true_27k = [random.sample(row,27000 ) for row in true_sorted_70]
bkg_lone_9k = [random.sample(row, 9000) for row in bkg_lone_sorted_70]
bkg_cluster_lone_9k = [random.sample(row, 9000) for row in bkg_cluster_lone_sorted_70]
bkg_cluster_cluster_9k = [random.sample(row, 9000) for row in bkg_cluster_cluster_sorted_70]

In [28]:
true_27k = np.array(true_27k)
bkg_lone_9k = np.array(bkg_lone_9k)
bkg_cluster_lone_9k = np.array(bkg_cluster_lone_9k)
bkg_cluster_cluster_9k = np.array(bkg_cluster_cluster_9k)

In [29]:
true_27k.shape

(70, 27000)

In [30]:
bkg_lone_9k.shape

(70, 9000)

In [31]:
bkg_cluster_cluster_9k.shape

(70, 9000)

In [32]:
bkg_total = np.concatenate((bkg_lone_9k, bkg_cluster_lone_9k, bkg_cluster_cluster_9k), axis =1)

In [33]:
bkg_total.shape

(70, 27000)

### Labelling for true/signal and fake/bkg as 1, 0

In [34]:
bkg_truth = np.zeros((70, 27000), dtype=int)
true_truth = np.ones((70, 27000), dtype=int)

In [35]:
total_truth = np.concatenate((true_truth,bkg_truth), axis =1)

In [36]:
total_truth.shape

(70, 54000)

In [37]:
total_training_indices = np.concatenate((true_27k,bkg_total), axis =1)

In [38]:
total_training_indices.shape

(70, 54000)

In [43]:
rand_index = []
for i in range(70):
    arr = np.arange(54000)
    np.random.shuffle(arr)
    rand_index.append(arr)

In [44]:
rand_index = np.array(rand_index)

In [45]:
rand_index[0]

array([53830, 29978, 22871, ..., 41831,  3443, 38019])

In [46]:
# Randomizing training sample
total_training_indices_rand = []
for i in range(70):
    total_training_indices_rand.append(total_training_indices[i][rand_index[i]])

In [47]:
# Randomizing truth of training sample according to the sample
total_truth_rand = []
for i in range(70):
    total_truth_rand.append(total_truth[i][rand_index[i]])

In [48]:
total_training_indices_rand = np.array(total_training_indices_rand)

In [49]:
total_truth_rand = np.array(total_truth_rand)

In [50]:
total_truth_rand[0]

array([0, 0, 1, ..., 0, 1, 0])

In [48]:
with h5py.File('./truth_label_train_70evs.hdf5', 'w') as f: 
    dset = f.create_dataset("truth_label_train", data = total_truth_rand)

In [51]:
total_training_indices_rand.shape

(70, 54000)

In [53]:
hf_neighbor_pairs_unique_sorted= h5py.File("./neighbor_pairs_unique_sorted.hdf5", 'r')
neighbor_pairs_unique_sorted = hf_neighbor_pairs_unique_sorted.get("neighbor_pair")[:]
hf_neighbor_pairs_unique_sorted.close()

In [54]:
# Arranging training edges with training indices
total_training_edges_rand = []
for i in range(70):
    total_training_edges_rand.append(neighbor_pairs_unique_sorted[total_training_indices_rand[i]])

In [55]:
total_training_edges_rand = np.array(total_training_edges_rand)

In [56]:
total_training_edges_rand.shape

(70, 54000, 2)

In [57]:
train_edge_source_BD =[]
for i in range(70):
    _train_edge_source_BD = []
    for pair in total_training_edges_rand[i]:
        _train_edge_source_BD.append(pair[0])
        _train_edge_source_BD.append(pair[1])
    train_edge_source_BD.append(_train_edge_source_BD)

In [58]:
train_edge_source_BD = np.array(train_edge_source_BD)

In [59]:
train_edge_source_BD.shape

(70, 108000)

In [60]:
train_edge_dest_BD =[]
for i in range(70):
    _train_edge_dest_BD = []
    for pair in total_training_edges_rand[i]:
        _train_edge_dest_BD.append(pair[1])
        _train_edge_dest_BD.append(pair[0])
    train_edge_dest_BD.append(_train_edge_dest_BD)

In [61]:
train_edge_dest_BD = np.array(train_edge_dest_BD)

In [62]:
train_edge_dest_BD.shape

(70, 108000)

In [63]:
train_edge_source_noBD =[]
for i in range(70):
    _train_edge_source_noBD =[]
    for pair in total_training_edges_rand[i]:
        _train_edge_source_noBD.append(pair[0])
    train_edge_source_noBD.append(_train_edge_source_noBD)

train_edge_dest_noBD =[]
for i in range(70):
    _train_edge_dest_noBD =[]
    for pair in total_training_edges_rand[i]:
        _train_edge_dest_noBD.append(pair[1])
    train_edge_dest_noBD.append(_train_edge_dest_noBD)

In [64]:
train_edge_source_noBD = np.array(train_edge_source_noBD)
train_edge_dest_noBD = np.array(train_edge_dest_noBD)

In [65]:
train_edge_source_noBD.shape

(70, 54000)

In [66]:
train_edge_dest_noBD.shape

(70, 54000)

In [64]:
with h5py.File('./train_edge_source_BD_70evs.hdf5', 'w') as f: 
    dset = f.create_dataset("train_edge_source_BD", data = train_edge_source_BD)

with h5py.File('./train_edge_dest_BD_70evs.hdf5', 'w') as f: 
    dset = f.create_dataset("train_edge_dest_BD", data = train_edge_dest_BD)
    
    
with h5py.File('./train_edge_source_noBD_70evs.hdf5', 'w') as f: 
    dset = f.create_dataset("train_edge_source_noBD", data = train_edge_source_noBD)

with h5py.File('./train_edge_dest_noBD_70evs.hdf5', 'w') as f: 
    dset = f.create_dataset("train_edge_dest_noBD", data = train_edge_dest_noBD)

# Scaling of features

In [67]:
keys = list(dynamic_variables.keys())
values = list(dynamic_variables.values())

In [68]:
rearranged_values = [values[i] for i in sorted_cluster_cluster_len_indices]

In [69]:
rearranged_dict = dict(zip(keys, rearranged_values))

In [70]:
data_70 = np.concatenate([value for key, value in list(rearranged_dict.items())[:70]])

In [71]:
data_30 = np.concatenate([value for key, value in list(dynamic_variables.items())[70:]])

In [72]:
scaler = MinMaxScaler()
cellFeatures_trainS = scaler.fit_transform(data_70)
scaler_filename = "./scaler_neighbor_data_70_sorted.save"
joblib.dump(scaler, scaler_filename)

['./scaler_neighbor_data_70_sorted.save']

In [73]:
cellFeatures_trainS[1]

array([0.58421445, 0.51289224, 0.16380877, 0.23466876, 0.52418755,
       0.26086957, 0.09700815, 0.23840893])

In [74]:
cellFeatures_trainS_reshape = cellFeatures_trainS.reshape(70, 187652, 8)

In [75]:
scaler = joblib.load('./scaler_neighbor_data_70_sorted.save') 
cellFeatures_testS = scaler.transform(data_30)

In [76]:
cellFeatures_testS_reshape = cellFeatures_testS.reshape(30, 187652, 8)

In [76]:
# creating Scaled Cell Feature file
with h5py.File('./cellFeaturesScaled_train_70evs.hdf5', 'w') as f: 
    dset = f.create_dataset("cellFeatures_trainS", data = cellFeatures_trainS_reshape)

In [77]:
with h5py.File('./cellFeaturesScaled_test_30evs.hdf5', 'w') as f: 
    dset = f.create_dataset("cellFeatures_testS", data = cellFeatures_testS_reshape)

### for test sample

In [77]:
true_len_sorted_30 = true_len_sorted[70:]
bkg_lone_len_sorted_30 = bkg_lone_len_sorted[70:]
bkg_cluster_lone_len_sorted_30 = bkg_cluster_lone_len_sorted[70:]
bkg_cluster_cluster_len_sorted_30 = bkg_cluster_cluster_len_sorted[70:]

In [78]:
min(true_len_sorted_30)

45600

In [79]:
min(bkg_lone_len_sorted_30)

1006867

In [80]:
min(bkg_cluster_lone_len_sorted_30)

87343

In [81]:
min(bkg_cluster_cluster_len_sorted_30)

3334

In [87]:
true_45k = [random.sample(row, 45000) for row in true_sorted_30]
bkg_lone_87k = [random.sample(row, 87000) for row in bkg_lone_sorted_30]
bkg_cluster_lone_87k = [random.sample(row, 87000) for row in bkg_cluster_lone_sorted_30]
bkg_cluster_cluster_3k = [random.sample(row, 3000) for row in bkg_cluster_cluster_sorted_30]

In [89]:
true_test_edges = []
bkg_lone_test_edges = []
bkg_cluster_lone_test_edges = []
bkg_cluster_cluster_test_edges = []

for i in range(30):
    true_test_edges.append(neighbor_pairs_unique_sorted[true_45k[i]])
    bkg_lone_test_edges.append(neighbor_pairs_unique_sorted[bkg_lone_87k[i]])
    bkg_cluster_lone_test_edges.append(neighbor_pairs_unique_sorted[bkg_cluster_lone_87k[i]])
    bkg_cluster_cluster_test_edges.append(neighbor_pairs_unique_sorted[bkg_cluster_cluster_3k[i]])

In [90]:
true_test_edges = np.array(true_test_edges)

In [91]:
true_test_edges.shape

(30, 45000, 2)

In [92]:
test_edge_source_true_BD =[]
for i in range(30):
    _test_edge_source_BD = []
    for pair in true_test_edges[i]:
        _test_edge_source_BD.append(pair[0])
        _test_edge_source_BD.append(pair[1])
    test_edge_source_true_BD.append(_test_edge_source_BD)

In [93]:
test_edge_dest_true_BD =[]
for i in range(30):
    _test_edge_dest_BD = []
    for pair in true_test_edges[i]:
        _test_edge_dest_BD.append(pair[1])
        _test_edge_dest_BD.append(pair[0])
    test_edge_dest_true_BD.append(_test_edge_dest_BD)

In [94]:
test_edge_source_bkg_lone_BD =[]
for i in range(30):
    _test_edge_source_BD = []
    for pair in bkg_lone_test_edges[i]:
        _test_edge_source_BD.append(pair[0])
        _test_edge_source_BD.append(pair[1])
    test_edge_source_bkg_lone_BD.append(_test_edge_source_BD)

In [95]:
test_edge_dest_bkg_lone_BD =[]
for i in range(30):
    _test_edge_dest_BD = []
    for pair in bkg_lone_test_edges[i]:
        _test_edge_dest_BD.append(pair[1])
        _test_edge_dest_BD.append(pair[0])
    test_edge_dest_bkg_lone_BD.append(_test_edge_dest_BD)

In [96]:
test_edge_source_bkg_cluster_lone_BD =[]
for i in range(30):
    _test_edge_source_BD = []
    for pair in bkg_cluster_lone_test_edges[i]:
        _test_edge_source_BD.append(pair[0])
        _test_edge_source_BD.append(pair[1])
    test_edge_source_bkg_cluster_lone_BD.append(_test_edge_source_BD)

In [97]:
test_edge_dest_bkg_cluster_lone_BD =[]
for i in range(30):
    _test_edge_dest_BD = []
    for pair in bkg_cluster_lone_test_edges[i]:
        _test_edge_dest_BD.append(pair[1])
        _test_edge_dest_BD.append(pair[0])
    test_edge_dest_bkg_cluster_lone_BD.append(_test_edge_dest_BD)

In [98]:
test_edge_source_bkg_cluster_cluster_BD =[]
for i in range(30):
    _test_edge_source_BD = []
    for pair in bkg_cluster_cluster_test_edges[i]:
        _test_edge_source_BD.append(pair[0])
        _test_edge_source_BD.append(pair[1])
    test_edge_source_bkg_cluster_cluster_BD.append(_test_edge_source_BD)

In [99]:
test_edge_dest_bkg_cluster_cluster_BD =[]
for i in range(30):
    _test_edge_dest_BD = []
    for pair in bkg_cluster_cluster_test_edges[i]:
        _test_edge_dest_BD.append(pair[1])
        _test_edge_dest_BD.append(pair[0])
    test_edge_dest_bkg_cluster_cluster_BD.append(_test_edge_dest_BD)

In [100]:
test_edge_source_true_noBD =[]
for i in range(30):
    _test_edge_source_noBD =[]
    for pair in true_test_edges[i]:
        _test_edge_source_noBD.append(pair[0])
    test_edge_source_true_noBD.append(_test_edge_source_noBD)

test_edge_dest_true_noBD =[]
for i in range(30):
    _test_edge_dest_noBD =[]
    for pair in true_test_edges[i]:
        _test_edge_dest_noBD.append(pair[1])
    test_edge_dest_true_noBD.append(_test_edge_dest_noBD)

In [101]:
test_edge_source_bkg_lone_noBD =[]
for i in range(30):
    _test_edge_source_noBD =[]
    for pair in bkg_lone_test_edges[i]:
        _test_edge_source_noBD.append(pair[0])
    test_edge_source_bkg_lone_noBD.append(_test_edge_source_noBD)

test_edge_dest_bkg_lone_noBD =[]
for i in range(30):
    _test_edge_dest_noBD =[]
    for pair in bkg_lone_test_edges[i]:
        _test_edge_dest_noBD.append(pair[1])
    test_edge_dest_bkg_lone_noBD.append(_test_edge_dest_noBD)

In [102]:
test_edge_source_bkg_cluster_lone_noBD =[]
for i in range(30):
    _test_edge_source_noBD =[]
    for pair in bkg_cluster_lone_test_edges[i]:
        _test_edge_source_noBD.append(pair[0])
    test_edge_source_bkg_cluster_lone_noBD.append(_test_edge_source_noBD)

test_edge_dest_bkg_cluster_lone_noBD =[]
for i in range(30):
    _test_edge_dest_noBD =[]
    for pair in bkg_cluster_lone_test_edges[i]:
        _test_edge_dest_noBD.append(pair[1])
    test_edge_dest_bkg_cluster_lone_noBD.append(_test_edge_dest_noBD)

In [103]:
test_edge_source_bkg_cluster_cluster_noBD =[]
for i in range(30):
    _test_edge_source_noBD =[]
    for pair in bkg_cluster_cluster_test_edges[i]:
        _test_edge_source_noBD.append(pair[0])
    test_edge_source_bkg_cluster_cluster_noBD.append(_test_edge_source_noBD)

test_edge_dest_bkg_cluster_cluster_noBD =[]
for i in range(30):
    _test_edge_dest_noBD =[]
    for pair in bkg_cluster_cluster_test_edges[i]:
        _test_edge_dest_noBD.append(pair[1])
    test_edge_dest_bkg_cluster_cluster_noBD.append(_test_edge_dest_noBD)

In [48]:
with h5py.File('./test_edge_source_true_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_true_BD", data = test_edge_source_true_BD)

with h5py.File('./test_edge_dest_true_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_true_BD", data = test_edge_dest_true_BD)
    
    
with h5py.File('./test_edge_source_true_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_true_noBD", data = test_edge_source_true_noBD)

with h5py.File('./test_edge_dest_true_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_true_noBD", data = test_edge_dest_true_noBD)

In [49]:
with h5py.File('./test_edge_source_bkg_lone_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_bkg_lone_BD", data = test_edge_source_bkg_lone_BD)

with h5py.File('./test_edge_dest_bkg_lone_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_bkg_lone_BD", data = test_edge_dest_bkg_lone_BD)
    
    
with h5py.File('./test_edge_source_bkg_lone_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_bkg_lone_noBD", data = test_edge_source_bkg_lone_noBD)

with h5py.File('./test_edge_dest_bkg_lone_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_bkg_lone_noBD", data = test_edge_dest_bkg_lone_noBD)

In [50]:
with h5py.File('./test_edge_source_bkg_cluster_lone_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_bkg_cluster_lone_BD", data = test_edge_source_bkg_cluster_lone_BD)

with h5py.File('./test_edge_dest_bkg_cluster_lone_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_bkg_cluster_lone_BD", data = test_edge_dest_bkg_cluster_lone_BD)
    
    
with h5py.File('./test_edge_source_bkg_cluster_lone_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_bkg_cluster_lone_noBD", data = test_edge_source_bkg_cluster_lone_noBD)

with h5py.File('./test_edge_dest_bkg_cluster_lone_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_bkg_cluster_lone_noBD", data = test_edge_dest_bkg_cluster_lone_noBD)

In [51]:
with h5py.File('./test_edge_source_bkg_cluster_cluster_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_bkg_cluster_cluster_BD", data = test_edge_source_bkg_cluster_cluster_BD)

with h5py.File('./test_edge_dest_bkg_cluster_cluster_BD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_bkg_cluster_cluster_BD", data = test_edge_dest_bkg_cluster_cluster_BD)
    
    
with h5py.File('./test_edge_source_bkg_cluster_cluster_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_source_bkg_cluster_cluster_noBD", data = test_edge_source_bkg_cluster_cluster_noBD)

with h5py.File('./test_edge_dest_bkg_cluster_cluster_noBD.hdf5', 'w') as f: 
    dset = f.create_dataset("test_edge_dest_bkg_cluster_cluster_noBD", data = test_edge_dest_bkg_cluster_cluster_noBD)