In [9]:
!pip install torch torch-geometric

Collecting torch-geometric
  Using cached torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
Collecting aiohttp (from torch-geometric)
  Using cached aiohttp-3.11.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting tqdm (from torch-geometric)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->torch-geometric)
  Using cached aiohappyeyeballs-2.4.4-py3-none-any.whl.metadata (6.1 kB)
Collecting aiosignal>=1.1.2 (from aiohttp->torch-geometric)
  Using cached aiosignal-1.3.2-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting frozenlist>=1.1.1 (from aiohttp->torch-geometric)
  Using cached frozenlist-1.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting multidict<7.0,>=4.5 (from aiohttp->torch-geometric)
  Using cached multidict-6.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.0 kB)
Col

In [20]:
import torch
from torch_geometric.nn import GCNConv
from torch.nn import Linear
from torch.nn.functional import relu, sigmoid, binary_cross_entropy
import numpy as np
import json

In [59]:
class GNN(torch.nn.Module):
    def __init__(self,  l1, l2, l3):
        super(GNN, self).__init__()
        self.conv1 = GCNConv(l1, l2)
        self.conv2 = GCNConv(l2, l3)

    
    def forward(self, x: torch.Tensor, edge_index: torch.Tensor) -> torch.Tensor:
        x = self.conv1(x, edge_index)
        x = relu(x)
        x = self.conv2(x, edge_index)
        return x

class EdgesMLP(torch.nn.Module):
    def __init__(self, l3):
        super(EdgesMLP, self).__init__()
        self.linear = Linear(2*l3, 1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.linear(x)
        return sigmoid(x)

In [60]:
import json
with open("../dataset.json", "r") as f:
    dataset = json.load(f)['dataset']


print("DATASET INFO:")
print("count row:", len(dataset))
print("first:", dataset[0].keys())
print(f"\t A:", np.shape(dataset[0]["A"]))
print(f"\t nodes_feature:", np.shape(dataset[0]["nodes_feature"]))
print(f"\t edges_feature:", np.shape(dataset[0]["edges_feature"]))
print(f"\t true_edges:", np.shape(dataset[0]["true_edges"]))
print("end:", dataset[-1].keys())
print(f"\t A:", np.shape(dataset[-1]["A"]))
print(f"\t nodes_feature:", np.shape(dataset[-1]["nodes_feature"]))
print(f"\t edges_feature:", np.shape(dataset[-1]["edges_feature"]))
print(f"\t true_edges:", np.shape(dataset[-1]["true_edges"]))


DATASET INFO:
count row: 1557
first: dict_keys(['A', 'nodes_feature', 'edges_feature', 'true_edges'])
	 A: (2, 779)
	 nodes_feature: (385, 9)
	 edges_feature: (779,)
	 true_edges: (779,)
end: dict_keys(['A', 'nodes_feature', 'edges_feature', 'true_edges'])
	 A: (2, 2142)
	 nodes_feature: (1039, 9)
	 edges_feature: (2142,)
	 true_edges: (2142,)


In [63]:
def rev_dist(a):
    if a==0:
        return 0
    else:
        return 1/a
        
i = dataset[0]["A"]
v_in = [rev_dist(e) for e in dataset[0]["edges_feature"]]
v_true = dataset[0]["true_edges"]
x = dataset[0]["nodes_feature"]
N = len(x)

X = torch.Tensor(x)
sp_A = torch.sparse_coo_tensor(i, v_in, (N, N))
E_true = torch.Tensor([v_true]).T

In [76]:
params = {
    "count_neuron_layer_1": 9,
    "count_neuron_layer_2": 27,
    "count_neuron_layer_end": 18,
    "epochs": 30,
    "batch_size": 100,
}

learning_rate = 0.05

l1 = params["count_neuron_layer_1"]
l2 = params["count_neuron_layer_2"]
l3 = params["count_neuron_layer_end"]

node_gnn = GNN(l1, l2, l3)
edge_linear = EdgesMLP(l3)

H_end = node_gnn(X, sp_A)
Omega = torch.cat([H_end[i[0]], H_end[i[1]]],dim=1)
E_pred = edge_linear(Omega)
loss = binary_cross_entropy(E_pred, E_true)

In [77]:
optimizer = torch.optim.Adam(
    list(node_gnn.parameters()) + list(edge_linear.parameters()),
    lr=learning_rate,
)
criterion = torch.nn.CrossEntropyLoss()

In [100]:
def list_batchs(dataset, batch_size):
    for i in range(0, len(dataset), batch_size):
        yield dataset[i:i+batch_size]

def split_train_val(dataset, val_split=0.2, shuffle=True, seed=1234):
    if shuffle:
        rng = np.random.default_rng(seed)
        rng.shuffle(dataset)
    train_size = int(len(dataset) * (1 - val_split))
    train_dataset = dataset[:train_size]
    val_dataset = dataset[train_size:]
    return train_dataset, val_dataset

def train_model(params, models, dataset, path_save, save_frequency=5):  
    opt = torch.optim.Adam(
    list(models[0].parameters()) + list(models[1].parameters()),
    lr=learning_rate,
    )
    criterion = torch.nn.CrossEntropyLoss()
    loss_list = []
    train_dataset, val_dataset = split_train_val(dataset, val_split=0.1)
    for k in range(params["epochs"]):
        my_loss_list = []
        print("="*10, f"EPOCH #{k+1}","="*10)
        for l, batch in enumerate(list_batchs(train_dataset, params["batch_size"])):
            optimizer.zero_grad()
            my_loss_list = []
            for j, graph in enumerate(batch):
                i = graph["A"]
                v_in = [rev_dist(e) for e in graph["edges_feature"]]
                v_true = graph["true_edges"]
                x = graph["nodes_feature"]
                N = len(x)
                
                X = torch.Tensor(x)
                sp_A = torch.sparse_coo_tensor(i, v_in, (N, N))
                E_true = torch.Tensor([v_true]).T
            
                H_end = node_gnn(X, sp_A)
                Omega = torch.cat([H_end[i[0]], H_end[i[1]]],dim=1)
                E_pred = edge_linear(Omega)
                loss_ = criterion(E_pred, E_true)
                print(loss_)
                if j == 0:
                    loss = loss_
                else:
                    loss += loss_
                print(loss)
                my_loss_list.append(loss_.item())
            loss.backward()
            optimizer.step()
        loss_list.append(np.mean(my_loss_list))
        print(f"\nBatch # {l+1} loss={my_loss_list[-1]:.4f}" + " "*40)
        train_val = np.mean(my_loss_list)
        validation_val = validation(model, val_dataset)
        with open('log.txt', 'a') as f:
            f.write(f"EPOCH #{i}\t {train_val:.4f} (VAL: {validation_val:.4f})\n")  
        if i % save_frequency == 0:
            model.save(path_save+f"_{i//save_frequency}")
    model.save(path_save)


In [101]:
train_model(params, [node_gnn, edge_linear], dataset, "torch", save_frequency=5)

tensor(-0., grad_fn=<DivBackward1>)
tensor(-0., grad_fn=<DivBackward1>)
tensor(-0., grad_fn=<DivBackward1>)
tensor(-0., grad_fn=<AddBackward0>)
tensor(-0., grad_fn=<DivBackward1>)
tensor(-0., grad_fn=<AddBackward0>)
tensor(-0., grad_fn=<DivBackward1>)
tensor(-0., grad_fn=<AddBackward0>)
tensor(-0., grad_fn=<DivBackward1>)
tensor(-0., grad_fn=<AddBackward0>)
tensor(-0., grad_fn=<DivBackward1>)
tensor(-0., grad_fn=<AddBackward0>)
tensor(-0., grad_fn=<DivBackward1>)
tensor(-0., grad_fn=<AddBackward0>)
tensor(-0., grad_fn=<DivBackward1>)
tensor(-0., grad_fn=<AddBackward0>)
tensor(-0., grad_fn=<DivBackward1>)
tensor(-0., grad_fn=<AddBackward0>)
tensor(-0., grad_fn=<DivBackward1>)
tensor(-0., grad_fn=<AddBackward0>)
tensor(-0., grad_fn=<DivBackward1>)
tensor(-0., grad_fn=<AddBackward0>)
tensor(-0., grad_fn=<DivBackward1>)
tensor(-0., grad_fn=<AddBackward0>)
tensor(-0., grad_fn=<DivBackward1>)
tensor(-0., grad_fn=<AddBackward0>)
tensor(-0., grad_fn=<DivBackward1>)
tensor(-0., grad_fn=<AddBack

RuntimeError: Found dtype Long but expected Float