In [9]:
!pip install torch torch-geometric

Collecting torch-geometric
  Using cached torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
Collecting aiohttp (from torch-geometric)
  Using cached aiohttp-3.11.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting tqdm (from torch-geometric)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->torch-geometric)
  Using cached aiohappyeyeballs-2.4.4-py3-none-any.whl.metadata (6.1 kB)
Collecting aiosignal>=1.1.2 (from aiohttp->torch-geometric)
  Using cached aiosignal-1.3.2-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting frozenlist>=1.1.1 (from aiohttp->torch-geometric)
  Using cached frozenlist-1.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting multidict<7.0,>=4.5 (from aiohttp->torch-geometric)
  Using cached multidict-6.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.0 kB)
Col

In [1]:
import torch
from torch_geometric.nn import GCNConv
from torch.nn import Linear
from torch.nn.functional import relu, sigmoid, binary_cross_entropy
import numpy as np
import json

  from .autonotebook import tqdm as notebook_tqdm


In [87]:
class GNN(torch.nn.Module):
    def __init__(self,  layers):
        super(GNN, self).__init__()
        convs = []
        Bs = []
        for l_in, l_out in zip(layers[:-1], layers[1:]):
            convs.append(GCNConv(l_in, l_out, bias=False))
            torch.nn.init.normal_(convs[-1].lin.weight,mean=0.01, std=0.3)
            Bs.append(torch.nn.Linear(l_in, l_out, bias=False))
            torch.nn.init.normal_(Bs[-1].weight, mean=0.5, std=0.3)
        self.convs = torch.nn.ModuleList(convs)
        self.Bs = torch.nn.ModuleList(Bs)

    
    def forward(self, x: torch.Tensor, edge_index: torch.Tensor) -> torch.Tensor:
        for conv, B in zip(self.convs, self.Bs):
            x = conv(x, edge_index) -  B(x)
            x = relu(x)
        return x

class EdgesMLP(torch.nn.Module):
    def __init__(self, layers):
        super(EdgesMLP, self).__init__()
        linears = []
        for l_in, l_out in zip(layers[:-1], layers[1:]):
            linears.append(Linear(l_in, l_out, bias=False))
            torch.nn.init.normal_(linears[-1].weight, mean=0.5, std=0.3)
        self.linears = linears

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        for linear in self.linears:
            x = linear(x)
            x = sigmoid(x)
        return torch.squeeze(x, 1)

def get_models(params):
    layers_gnn = params["count_neuron_layers_gnn"]
    layers_edge = params["count_neuron_layers_edge"]
    node_gnn = GNN(layers_gnn)
    edge_linear = EdgesMLP(layers_edge)
    return node_gnn, edge_linear

def list_batchs(dataset, batch_size):
    for i in range(0, len(dataset), batch_size):
        yield dataset[i:i+batch_size]

def get_tensor_from_graph(graph):
    i = graph["A"]
    v_in = [rev_dist(e) for e in graph["edges_feature"]]
    v_true = graph["true_edges"]
    x = graph["nodes_feature"]
    N = len(x)
    
    X = torch.tensor(data=x, dtype=torch.float32)
    sp_A = torch.sparse_coo_tensor(indices=i, values=v_in, size=(N, N), dtype=torch.float32)
    E_true = torch.tensor(data=v_true, dtype=torch.float32)
    return X, sp_A, E_true, i

def validation(models, dataset, criterion):
    my_loss_list = []
    for j, graph in enumerate(dataset):
        X, sp_A, E_true, i = get_tensor_from_graph(graph)
        H_end = models[0](X, sp_A)
        Omega = torch.cat([H_end[i[0]], H_end[i[1]]],dim=1)
        E_pred = models[1](Omega)
        loss = criterion(E_pred, E_true)
        my_loss_list.append(loss.item())
        print(f"{(j+1)/len(dataset)*100:.2f} % loss = {my_loss_list[-1]:.5f} {' '*30}", end='\r')
    return np.mean(my_loss_list)

def split_train_val(dataset, val_split=0.2, shuffle=True, seed=1234):
    if shuffle:
        rng = np.random.default_rng(seed)
        rng.shuffle(dataset)
    train_size = int(len(dataset) * (1 - val_split))
    train_dataset = dataset[:train_size]
    val_dataset = dataset[train_size:]
    return train_dataset, val_dataset

def train_step(models, batch, optimizer, criterion):
    optimizer.zero_grad()
    my_loss_list = []
   
    for j, graph in enumerate(batch):
        X, sp_A, E_true, i = get_tensor_from_graph(graph)
        H_end = models[0](X, sp_A)
        Omega = torch.cat([H_end[i[0]], H_end[i[1]]],dim=1)
        E_pred = models[1](Omega)
        loss = criterion(E_pred, E_true)
        my_loss_list.append(loss.item())
        print(f"Batch loss={my_loss_list[-1]:.4f}" + " "*40, end="\r")
        loss.backward()
    optimizer.step()
    return np.mean(my_loss_list)

def train_model(params, models, dataset, path_save, save_frequency=5, restart=False):  
    optimizer = torch.optim.Adam(
    list(models[0].parameters()) + list(models[1].parameters()),
    lr=learning_rate,
    )
    criterion = torch.nn.BCELoss()
    loss_list = []
    with open('log.txt', 'a') as f:
        for key, val in params.items():
            f.write(f"{key}:\t{val}\n")
    train_dataset, val_dataset = split_train_val(dataset, val_split=0.1)
    for k in range(params["epochs"]):
        my_loss_list = []
        
        for l, batch in enumerate(list_batchs(train_dataset, params["batch_size"])):
            batch_loss = train_step(models, batch, optimizer, criterion)
            my_loss_list.append(batch_loss)
            print(f"Batch # {l+1} loss={my_loss_list[-1]:.4f}" + " "*40)
        train_val = np.mean(my_loss_list)
        loss_list.append(train_val)
        validation_val = validation(models, val_dataset, criterion)
        print("="*10, f"EPOCH #{k+1}","="*10, f"({train_val:.4f}/{validation_val:.4f})")
        
        # TODO: DELETE RESTART
        if restart and k>=2 and abs(loss_list[k] - loss_list[k-1]) < 0.001:
            return True
            
            
        with open('log.txt', 'a') as f:
            f.write(f"EPOCH #{k}\t {train_val:.8f} (VAL: {validation_val:.8f})\n")  
        if (k+1) % save_frequency == 0:
            num = k//save_frequency
            torch.save(models[0].state_dict(), path_save+f"_node_gnn_{num}")
            torch.save(models[1].state_dict(), path_save+f"_edge_linear_{num}")
    torch.save(models[0].state_dict(), path_save+f"_node_gnn_end")
    torch.save(models[1].state_dict(), path_save+f"_edge_linear_end")
    return False # For restart


In [2]:
import json
# with open("../dataset.json", "r") as f:
#     dataset = json.load(f)['dataset']
with open("/home/daniil/pager_11000_4N_seg.json", "r") as f:
    dataset = json.load(f)['dataset']

print("DATASET INFO:")
print("count row:", len(dataset))
print("first:", dataset[0].keys())
print(f"\t A:", np.shape(dataset[0]["A"]))
print(f"\t nodes_feature:", np.shape(dataset[0]["nodes_feature"]))
print(f"\t edges_feature:", np.shape(dataset[0]["edges_feature"]))
print(f"\t true_edges:", np.shape(dataset[0]["true_edges"]))
print("end:", dataset[-1].keys())
print(f"\t A:", np.shape(dataset[-1]["A"]))
print(f"\t nodes_feature:", np.shape(dataset[-1]["nodes_feature"]))
print(f"\t edges_feature:", np.shape(dataset[-1]["edges_feature"]))
print(f"\t true_edges:", np.shape(dataset[-1]["true_edges"]))


DATASET INFO:
count row: 11900
first: dict_keys(['A', 'nodes_feature', 'edges_feature', 'true_edges'])
	 A: (2, 925)
	 nodes_feature: (451, 9)
	 edges_feature: (925,)
	 true_edges: (925,)
end: dict_keys(['A', 'nodes_feature', 'edges_feature', 'true_edges'])
	 A: (2, 1597)
	 nodes_feature: (778, 9)
	 edges_feature: (1597,)
	 true_edges: (1597,)


In [3]:
def rev_dist(a):
    if a==0:
        return 0
    else:
        return 1/a
        
i = dataset[0]["A"]
v_in = [rev_dist(e) for e in dataset[0]["edges_feature"]]
v_true = dataset[0]["true_edges"]
x = dataset[0]["nodes_feature"]
N = len(x)

X = torch.Tensor(x)
sp_A = torch.sparse_coo_tensor(i, v_in, (N, N))
E_true = torch.Tensor(v_true)

In [4]:
params = {
    "count_neuron_layers_gnn": [9, 27, 18],
    "count_neuron_layers_edge": [18*2, 1],
    "epochs": 30,
    "batch_size": 400,
}

learning_rate = 0.05

node_gnn, edge_linear = get_models(params)


optimizer = torch.optim.Adam(
    list(node_gnn.parameters()) + list(edge_linear.parameters()),
    lr=learning_rate,
)
criterion = torch.nn.BCELoss()


H_end = node_gnn(X, sp_A)
Omega = torch.cat([H_end[i[0]], H_end[i[1]]],dim=1)
E_pred = edge_linear(Omega)
print(f"E_pred:\n{E_pred}", f"\nE_true:\n{E_true}")
print("Loss = ", criterion(E_pred, E_true))

del optimizer, criterion

NameError: name 'get_models' is not defined

In [9]:
# restart = True
# num_test = 0
# while restart:
#     num_test += 1
#     print("=/"*10 + "NUMTEST " + str(num_test) + " " + "=/"*30)
models = get_models(params)
restart = train_model(params, models, dataset, f"{num_test}_deep_torch_11000", save_frequency=5, restart=False)

Batch # 1 loss=1.7818                                        
Batch # 2 loss=4.8147                                        
Batch # 3 loss=2.3238                                        
Batch # 4 loss=0.6592                                        
Batch # 5 loss=0.6754                                        
Batch # 6 loss=0.6888                                        
Batch # 7 loss=0.6781                                        
Batch # 8 loss=0.6668                                        
Batch # 9 loss=0.6686                                        
Batch # 10 loss=0.6430                                        
Batch # 11 loss=0.6633                                        
Batch # 12 loss=0.6160                                        
Batch # 13 loss=0.6769                                        
Batch # 14 loss=0.6758                                        
Batch # 15 loss=0.6768                                        
Batch # 16 loss=0.6778                                        
B

In [None]:
def load_weigths(models, path_node_gnn, path_edge_linear):
    models[0].load_state_dict(torch.load(path_node_gnn, weights_only=True))
    models[1].load_state_dict(torch.load(path_edge_linear, weights_only=True))

In [20]:
models_load = get_models(params)
load_weigths(models_load, "deep_torch_11000_node_gnn_end", "deep_torch_11000_edge_linear_end")

In [21]:
def torch_classification_edges(models, graph, k=0.51):
    i = graph["A"]
    v_in = [rev_dist(e) for e in graph["edges_feature"]]
    x = graph["nodes_feature"]
    N = len(x)
    X = torch.tensor(data=x, dtype=torch.float32)
    sp_A = torch.sparse_coo_tensor(indices=i, values=v_in, size=(N, N), dtype=torch.float32)
    
    H_end = models[0](X, sp_A)
    Omega = torch.cat([H_end[i[0]], H_end[i[1]]],dim=1)
    E_pred = models[1](Omega)
    a = np.zeros(E_pred.shape)
    return E_pred
    a[E_pred>k] = 1.0
    return a

In [25]:
mask = torch_classification_edges(models, dataset[2], )

In [26]:
sum(mask == np.array(dataset[2]['true_edges']))/ len(mask)

tensor(0.)

In [None]:
# 0.90 - для 1500 изображений
#

In [29]:
for i in dataset[2]['true_edges']:
    print(i)

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
1
0
1
1
0
1
1
0
1
1
0
1
1
0
1
1
0
1
1
0
1
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1


# asGLAM

In [5]:
from torch_geometric.nn import BatchNorm, TAGConv
from torch.nn import Dropout

In [6]:
class NodeGLAM(torch.nn.Module):
    def __init__(self,  input_, output_):
        super(NodeGLAM, self).__init__()
        self.batch_norm1 = BatchNorm(input_)
        
        h1 = 16                                 #<------------------------
        self.linear1 = Linear(input_, h1) 
        h2 = 16                                 #<------------------------
        self.tag1 = TAGConv(h1, h2)
        
        h3 = 16                                 #<------------------------
        self.linear2 = Linear(h2, h3) 
        h4 = 16                                 #<------------------------
        self.tag2 = TAGConv(h3, h4)

        h5 = 16                                 #<------------------------
        self.linear3 = Linear(h4+input_, h5)
        h6 = 16                                 #<------------------------
        self.linear4 =Linear(h5, output_)

    
    def forward(self, x: torch.Tensor, edge_index: torch.Tensor) -> torch.Tensor:
        x = self.batch_norm1(x)
        h = self.linear1(x)
        h = relu(h)
        h = self.tag1(h, edge_index)
        h = relu(h)
        
        h = self.linear2(h)
        h = relu(h)
        h = self.tag2(h, edge_index)
        h = relu(h)
        a = torch.cat([x, h], dim=1)
        a = self.linear3(a)
        a = relu(a)
        a = self.linear4(a)
        return torch.softmax(a, dim=-1)

class EdgeGLAM(torch.nn.Module):
    def __init__(self, input_, output_):
        super(EdgeGLAM, self).__init__()
        self.batch_norm2 = BatchNorm(input_, output_)
        h1 = 16                                 #<------------------------
        self.linear1 = Linear(input_, h1) 
        self.linear2 = Linear(h1, output_)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.batch_norm2(x)
        h = self.linear1(x)
        h = relu(h)
        h = self.linear2(h)
        h = torch.sigmoid(h)
        return torch.squeeze(h, 1)

In [7]:
node_glam = NodeGLAM(9, 5)
edge_glam = EdgeGLAM(2*9+2*5, 1)

In [9]:
def list_batchs(dataset, batch_size):
    for i in range(0, len(dataset), batch_size):
        yield dataset[i:i+batch_size]

def get_tensor_from_graph(graph):
    i = graph["A"]
    v_in = [rev_dist(e) for e in graph["edges_feature"]]
    v_true = graph["true_edges"]
    x = graph["nodes_feature"]
    N = len(x)
    
    X = torch.tensor(data=x, dtype=torch.float32)
    sp_A = torch.sparse_coo_tensor(indices=i, values=v_in, size=(N, N), dtype=torch.float32)
    E_true = torch.tensor(data=v_true, dtype=torch.float32)
    return X, sp_A, E_true, i

def validation(models, dataset, criterion):
    my_loss_list = []
    for j, graph in enumerate(dataset):
        X, sp_A, E_true, i = get_tensor_from_graph(graph)
        Node_emb = models[0](X, sp_A)
        Omega = torch.cat([Node_emb[i[0]],Node_emb[i[1]], X[i[0]], X[i[1]]],dim=1)
        E_pred = models[1](Omega)
        loss = criterion(E_pred, E_true)
        my_loss_list.append(loss.item())
        print(f"{(j+1)/len(dataset)*100:.2f} % loss = {my_loss_list[-1]:.5f} {' '*30}", end='\r')
    return np.mean(my_loss_list)

def split_train_val(dataset, val_split=0.2, shuffle=True, seed=1234):
    if shuffle:
        rng = np.random.default_rng(seed)
        rng.shuffle(dataset)
    train_size = int(len(dataset) * (1 - val_split))
    train_dataset = dataset[:train_size]
    val_dataset = dataset[train_size:]
    return train_dataset, val_dataset

def train_step(models, batch, optimizer, criterion):
    optimizer.zero_grad()
    my_loss_list = []
   
    for j, graph in enumerate(batch):
        X, sp_A, E_true, i = get_tensor_from_graph(dataset[0])
        Node_emb = models[0](X, sp_A)
        Omega = torch.cat([Node_emb[i[0]],Node_emb[i[1]], X[i[0]], X[i[1]]],dim=1)
        E_pred = models[1](Omega)
        loss = criterion(E_pred, E_true)
        my_loss_list.append(loss.item())
        print(f"Batch loss={my_loss_list[-1]:.4f}" + " "*40, end="\r")
        loss.backward()
    optimizer.step()
    return np.mean(my_loss_list)

def train_model(params, models, dataset, path_save, save_frequency=5, restart=False):  
    optimizer = torch.optim.Adam(
    list(models[0].parameters()) + list(models[1].parameters()),
    lr=params["learning_rate"],
    )
    criterion = torch.nn.BCELoss()
    loss_list = []
    with open('log.txt', 'a') as f:
        for key, val in params.items():
            f.write(f"{key}:\t{val}\n")
    train_dataset, val_dataset = split_train_val(dataset, val_split=0.1)
    for k in range(params["epochs"]):
        my_loss_list = []
        
        for l, batch in enumerate(list_batchs(train_dataset, params["batch_size"])):
            batch_loss = train_step(models, batch, optimizer, criterion)
            my_loss_list.append(batch_loss)
            print(f"Batch # {l+1} loss={my_loss_list[-1]:.4f}" + " "*40)
        train_val = np.mean(my_loss_list)
        loss_list.append(train_val)
        validation_val = validation(models, val_dataset, criterion)
        print("="*10, f"EPOCH #{k+1}","="*10, f"({train_val:.4f}/{validation_val:.4f})")
        
        # TODO: DELETE RESTART
        if restart and k>=2 and abs(loss_list[k] - loss_list[k-1]) < 0.001:
            return True
            
            
        with open('log.txt', 'a') as f:
            f.write(f"EPOCH #{k}\t {train_val:.8f} (VAL: {validation_val:.8f})\n")  
        if (k+1) % save_frequency == 0:
            num = k//save_frequency
            torch.save(models[0].state_dict(), path_save+f"_node_gnn_{num}")
            torch.save(models[1].state_dict(), path_save+f"_edge_linear_{num}")
    torch.save(models[0].state_dict(), path_save+f"_node_gnn_end")
    torch.save(models[1].state_dict(), path_save+f"_edge_linear_end")
    return False # For restart

In [10]:
X, sp_A, E_true, i = get_tensor_from_graph(dataset[0])
Node_emb = node_glam(X, sp_A)
Omega = torch.cat([Node_emb[i[0]],Node_emb[i[1]], X[i[0]], X[i[1]]],dim=1)
Edge_emb = edge_glam(Omega)

Edge_emb.shape,X.shape

  src = src.to_sparse_csr()


(torch.Size([925]), torch.Size([451, 9]))

In [11]:
params = {
    "epochs": 30,
    "batch_size": 100,
    "learning_rate": 0.05
}
node_glam = NodeGLAM(9, 5)
edge_glam = EdgeGLAM(2*9+2*5, 1)
path_save = "deep_torch_11000_as_glam"
criterion = torch.nn.BCELoss()
train_model(params, [node_glam, edge_glam], dataset, path_save, save_frequency=10, restart=False)

Batch # 1 loss=0.7919                                        
Batch # 2 loss=0.6893                                        
Batch # 3 loss=0.5681                                        
Batch # 4 loss=0.4458                                        
Batch # 5 loss=0.3943                                        
Batch # 6 loss=0.4412                                        
Batch # 7 loss=0.4576                                        
Batch # 8 loss=0.4256                                        
Batch # 9 loss=0.3860                                        
Batch # 10 loss=0.3658                                        
Batch # 11 loss=0.3670                                        
Batch # 12 loss=0.3707                                        
Batch # 13 loss=0.3720                                        
Batch # 14 loss=0.3664                                        
Batch # 15 loss=0.3569                                        
Batch # 16 loss=0.3489                                        
B

KeyboardInterrupt: 

In [107]:
Omega.shape

torch.Size([925, 28])