## Build the Data object

In [None]:
import numpy as np
import pandas as pd 
import random as rd
import seaborn as sns
import matplotlib.pyplot as plt
import networkx as nx
import torch
import torch_geometric.data as data
from torch_geometric.nn import GCNConv
import torch_geometric.transforms as T
import torch.nn.functional as F
from torch_geometric.utils import negative_sampling,train_test_split_edges,to_dense_adj
from torch_geometric.loader import DataLoader
from sklearn.metrics import roc_auc_score
from torch_geometric.transforms import RandomLinkSplit
from sklearn import preprocessing
from torch_geometric.nn import Node2Vec

device = "cpu"

In [None]:
len_datasets = 13
datasets_nodes = [0]*len_datasets
datasets_edges = [0]*len_datasets
data_list = [0]*len_datasets
for i in range(0,len_datasets):
    datasets_nodes[i] = pd.read_csv(r"Coles/Nodes/Nodes_t"+str(i+1)+".csv",sep=",",encoding = 'unicode_escape')
    datasets_edges[i] = pd.read_csv(r"Coles/Edges/Edges_t"+str(i+1)+".csv",sep=",",encoding = 'unicode_escape')
    datasets_nodes[i].drop("ID",axis=1,inplace=True)
    if i == 0:
        datasets_edges[i][["from","to"]] = datasets_edges[i][["from","to"]].apply(lambda x:x-datasets_edges[i]["from"].min())
    else:
        datasets_edges[i][["from","to"]] = datasets_edges[i][["from","to"]].apply(lambda x:x+1+datasets_edges[i-1]["from"].max()-datasets_edges[i]["from"].min())
    datasets_nodes[i]["Escuela"] = i
    datasets_edges[i]["Escuela"] = i
    datasets_edges[i]["weight"] = datasets_edges[i]["weight"].apply(lambda x: np.sign(x)).replace({-1:0}).reset_index().drop("index",axis=1)
    data_list[i] = data.Data(edge_index = torch.tensor(datasets_edges[i][["from","to"]].to_numpy().T))

data_loader = DataLoader(data_list, batch_size=len_datasets)

In [None]:
total_embeddings = pd.DataFrame()
i = 0
for data in data_loader.dataset:
    print("Dataset: {}".format(i))
    model = Node2Vec(data.edge_index, embedding_dim=128, walk_length=40,
             context_size=20, walks_per_node=5,
             num_negative_samples=1, p=1, q=1, sparse=True).to(device)
    loader = model.loader(batch_size=128, shuffle=True, num_workers=4)
    optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=0.01)
    
    #####
    def train():
        model.train()
        total_loss = 0
        for pos_rw, neg_rw in loader:
            optimizer.zero_grad()
            loss = model.loss(pos_rw.to(device), neg_rw.to(device))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        return total_loss / len(loader)


    for epoch in range(1, 121):
        loss = train()
        if epoch % 10 == 0:
            print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}')
        ####
        
    z = model()
    # from tensor to numpy
    emb_128 = z.detach().cpu().numpy()
    
    edge_embedding = []
    for u,v in data.edge_index.t():
        edge_embedding.append(np.maximum(emb_128[u],emb_128[v]))
    total_embeddings_temp = pd.DataFrame(edge_embedding)
    total_embeddings_temp["Escuela"] = datasets_edges[i]["Escuela"]
    total_embeddings_temp["weight"] = pd.DataFrame(datasets_edges[i][["weight"]])
    total_embeddings = pd.concat([total_embeddings,total_embeddings_temp],axis=0)
    i += 1
    
total_embeddings.to_csv("total_embeddings_with_bf_dataloader.csv",index="False")

### Build the dataloader object