# Loading HTGN Data 

In [14]:
from os.path import join 
import numpy as np 
import pandas as pd
import pickle
from torch_geometric.utils.convert import from_scipy_sparse_matrix
import torch

## UTILS

In [2]:
def save_as_csv(u,i,t,w,save_path):
    df = pd.DataFrame({'u':u,'i':i,'ts':t,'label':w})
    df.to_csv(save_path,index=False)


def convert_to_dgb(datadir_src, dg_file):
    
    with open(join(datadir_src, dg_file), 'rb') as f:
        dg = pickle.load(f, encoding='latin1')
    all_u = []
    all_i = []
    all_t = []
    all_w = []
    count_link = 0
    for i,d in enumerate(dg):
        ei, w = from_scipy_sparse_matrix(dg[i])
        u = ei[0].numpy()
        v = ei[1].numpy()
        t = np.ones(len(u))*i
        count_link += len(u)
        all_u.append(u)
        all_i.append(v)
        all_t.append(t)
        all_w.append(w.numpy())
    all_u = np.concatenate(all_u)
    all_i = np.concatenate(all_i)
    all_t = np.concatenate(all_t)
    all_w = np.concatenate(all_w)

    print('Number of snapshots: ',i+1)
    print('Number of links: ',count_link)
    print('Number of nodes: ',max(np.unique(np.concatenate((all_u,all_i)))) +1 )
    return all_u, all_i, all_t, all_w

## Colab

### Dest 

In [3]:
#path 
datadir_dst = '/home/karmim/Documents/These/DynamicGraphTransformer/TG_Network_datasets'
name = 'Colab'
dataset_dst = join(datadir_dst,name)
file_save = join(datadir_dst, name,'ml_'+str(name)+'.csv')

In [4]:
file_save

'/home/karmim/Documents/These/DynamicGraphTransformer/TG_Network_datasets/Colab/ml_Colab.csv'

### Source 

In [5]:
datadir_src = "/home/karmim/Documents/These/DynamicGraphTransformer/data/input/raw/dblp"
dg_file = "adj_time_list.pickle"


In [6]:

all_u, all_i, all_t, all_w = convert_to_dgb(datadir_src, dg_file)

Number of snapshots:  10
Number of links:  5104
Number of nodes:  315


  dg = pickle.load(f, encoding='latin1')


In [7]:
save_as_csv(all_u,all_i,all_t,all_w,file_save)

In [8]:
file_save

'/home/karmim/Documents/These/DynamicGraphTransformer/TG_Network_datasets/Colab/ml_Colab.csv'

## Enron

### Dest 

In [48]:
#path 
datadir_dst = '/home/karmim/Documents/These/DynamicGraphTransformer/TG_Network_datasets'
name = 'Enron'
dataset_dst = join(datadir_dst,name)
file_save = join(datadir_dst, name,'ml_'+str(name)+'.csv')

### Source 

In [49]:
datadir_src = "/home/karmim/Documents/These/DynamicGraphTransformer/data/input/raw/enron10"
dg_file = "adj_time_list.pickle"

In [50]:
all_u, all_i, all_t, all_w = convert_to_dgb(datadir_src, dg_file)

Number of snapshots:  11
Number of links:  4784
Number of nodes:  184


  dg = pickle.load(f, encoding='latin1')


In [51]:
save_as_csv(all_u,all_i,all_t,all_w,file_save)

In [52]:
file_save

'/home/karmim/Documents/These/DynamicGraphTransformer/TG_Network_datasets/Enron/ml_Enron.csv'

## AS733

### Dest 

In [12]:
#path 
datadir_dst = '/home/karmim/Documents/These/DynamicGraphTransformer/TG_Network_datasets'
name = 'AS733'
dataset_dst = join(datadir_dst,name)
file_save = join(datadir_dst, name,'ml_'+str(name)+'.csv')

### Source

In [13]:
datadir_src = "/home/karmim/Documents/These/DynamicGraphTransformer/data/input/processed/as733"
dg_file = "as733.pt"

In [15]:
dg = torch.load(join(datadir_src, dg_file))

In [28]:
all_u = []
all_i = []
all_t = []
all_w = []
count_link = 0
for i, d in enumerate(dg): 
    u = np.array(dg[i]).T[0]
    v = np.array(dg[i]).T[1]
    t = np.ones(len(u))*i
    w = np.ones(len(u))
    count_link += len(u)
    all_u.append(u)
    all_i.append(v)
    all_t.append(t)
    all_w.append(w)

all_u = np.concatenate(all_u)
all_i = np.concatenate(all_i)
all_t = np.concatenate(all_t)
all_w = np.concatenate(all_w)
print('Number of snapshots: ',i+1)
print('Number of links: ',count_link)
print('Number of nodes: ',max(np.unique(np.concatenate((all_u,all_i)))) +1 )

Number of snapshots:  30
Number of links:  163807
Number of nodes:  6628


In [29]:
save_as_csv(all_u,all_i,all_t,all_w,file_save)

In [30]:
file_save

'/home/karmim/Documents/These/DynamicGraphTransformer/TG_Network_datasets/AS733/ml_AS733.csv'

## HepPh

### Dest 

In [31]:
#path 
datadir_dst = '/home/karmim/Documents/These/DynamicGraphTransformer/TG_Network_datasets'
name = 'HepPh'
dataset_dst = join(datadir_dst,name)
file_save = join(datadir_dst, name,'ml_'+str(name)+'.csv')

### Source

In [32]:
datadir_src = "/home/karmim/Documents/These/DynamicGraphTransformer/data/input/processed/HepPh30"

dg_file = "HepPh30.pt"

In [44]:
dg = torch.load(join(datadir_src, dg_file))

In [45]:
all_u = []
all_i = []
all_t = []
all_w = []
count_link = 0
for i, d in enumerate(dg): 
    u = np.array(dg[i]).T[0]
    v = np.array(dg[i]).T[1]
    t = np.ones(len(u))*i
    w = np.ones(len(u))
    count_link += len(u)
    all_u.append(u)
    all_i.append(v)
    all_t.append(t)
    all_w.append(w)

all_u = np.concatenate(all_u)
all_i = np.concatenate(all_i)
all_t = np.concatenate(all_t)
all_w = np.concatenate(all_w)
print('Number of snapshots: ',i+1)
print('Number of links: ',count_link)
print('Number of nodes: ',max(np.unique(np.concatenate((all_u,all_i)))) +1 )

Number of snapshots:  36
Number of links:  1631581
Number of nodes:  15330


In [46]:
save_as_csv(all_u,all_i,all_t,all_w,file_save)

In [47]:
file_save

'/home/karmim/Documents/These/DynamicGraphTransformer/TG_Network_datasets/HepPh/ml_HepPh.csv'

## DISEASE

### Dest 

In [54]:
#path 
datadir_dst = '/home/karmim/Documents/These/DynamicGraphTransformer/TG_Network_datasets'
name = 'DISEASE'
dataset_dst = join(datadir_dst,name)
file_save = join(datadir_dst, name,'ml_'+str(name)+'.csv')

In [57]:
datadir_src = "/home/karmim/Documents/These/DynamicGraphTransformer/data/input/raw/disease"
dg_file = "disease.csv"

In [58]:
df =  pd.read_csv(join(datadir_src, dg_file), sep=',', header=None)

In [59]:
df

Unnamed: 0,0,1
0,0,1
1,0,2
2,0,3
3,0,4
4,0,5
...,...,...
2659,427,2660
2660,427,2661
2661,427,2662
2662,427,2663


In [67]:
import scipy.sparse as sp

In [63]:
datadir_src = "/home/karmim/Documents/These/DynamicGraphTransformer/data/input/raw/disease"
dg_file = "disease_lp.feats.npz"

In [69]:
sp.load_npz(join(datadir_src,dg_file)).toarray().shape

(2665, 11)

In [64]:
dg = np.load(join(datadir_src, dg_file))

In [65]:
dg

NpzFile '/home/karmim/Documents/These/DynamicGraphTransformer/data/input/raw/disease/disease_lp.feats.npz' with keys: indices, indptr, format, shape, data

In [70]:
dg_file = "disease_lp.edges.csv"

df = pd.read_csv(join(datadir_src, dg_file), sep=',', header=None)

In [72]:
df.values

array([[   0,    1],
       [   0,    2],
       [   0,    3],
       ...,
       [ 427, 2662],
       [ 427, 2663],
       [ 427, 2664]])