In [57]:
import torch
from torch import nn
import torch.nn.functional as F
import networkx as nx
import matplotlib.pyplot as plt
import torch.optim as optim
from scipy import sparse as sp
import random
from graphviz import Graph
import pickle
import numpy as np

device = torch.device('cuda')

In [74]:
# Decoder
class Decoder(torch.nn.Module):
    def __init__(self, in_features, hidden_features, out_features, n_heads, d_h):
        super(Decoder, self).__init__()
        self.n_heads = n_heads
        self.hidden_features = hidden_features
        self.d_h = d_h

        self.linear1 = nn.Linear(in_features, hidden_features)
        self.linear2 = nn.Linear(hidden_features, out_features)
        self.attn_linear1 = nn.Linear(hidden_features, d_h)
        self.attn_linear2 = nn.Linear(hidden_features, d_h)
        self.softmax = nn.Softmax(dim=1)
        self.activation = nn.Tanh()

    def forward(self, x, v_prev, neighbors):
        n_nodes = x.shape[0]

        v_prev = self.linear1(v_prev)
        v_prev = v_prev.unsqueeze(0).repeat(n_nodes, 1)

        neighbors = self.linear1(neighbors)

        attn_input = torch.cat([v_prev, neighbors], dim=-1)
        attn_input = self.activation(attn_input)

        attn1 = self.attn_linear1(attn_input)
        attn2 = self.attn_linear2(attn_input)

        attn_output = torch.matmul(attn1, attn2.transpose(0, 1)) / (self.d_h ** 0.5)
        attn_output = self.activation(attn_output)

        masked_attn_output = attn_output.masked_fill(neighbors == 0, float('-inf'))
        attn_weights = self.softmax(masked_attn_output)

        x = self.linear2(x)
        x = x.unsqueeze(0).repeat(n_nodes, 1, 1)

        output = torch.matmul(attn_weights.unsqueeze(1), x)
        output = output.squeeze(1)

        return output, attn_weights

In [75]:
# Encoder
class GraphAttentionLayer(torch.nn.Module):
    def __init__(self, in_features, out_features, n_heads, is_concat = True, dropout = 0.6, leacky_relu_negative_slope = 0.2):
        super(GraphAttentionLayer, self).__init__()
        self.W = torch.nn.Parameter(torch.randn(in_features, out_features))
        self.is_concat = is_concat
        self.n_heads = n_heads

        if is_concat:
            assert out_features % n_heads == 0

            self.n_hidden = out_features // n_heads
        else:
            self.n_hidden = out_features

        self.linear = nn.Linear(in_features, self.n_hidden * n_heads, bias = False)

        self.attn = nn.Linear(self.n_hidden * 2, 1, bias = False)
        self.activation = nn.LeakyReLU(negative_slope = leacky_relu_negative_slope)
        self.softmax = nn.Softmax(dim=1)
        self.dropout = nn.Dropout(dropout) 

    def forward(self, x, adj):
        n_nodes = x.shape[0]
        g=self.linear(x).view(n_nodes, self.n_heads, self.n_hidden)
        g_repeat = g.repeat(n_nodes, 1,1)
        g_repeat_interleave = g.repeat_interleave(n_nodes, dim=0)
        g_concat = torch.cat([g_repeat_interleave, g_repeat], dim = -1)
        g_concat = g_concat.view(n_nodes, n_nodes, self.n_heads, 2 * self.n_hidden)
        e = self.activation(self.attn(g_concat))
        e = e.squeeze(-1)
        assert adj.shape[0] == 1 or adj.shape[0] == n_nodes
        assert adj.shape[1] == 1 or adj.shape[1] == n_nodes
        assert adj.shape[2] == 1 or adj.shape[2] == self.n_heads
        e=e.masked_fill(adj == 0, 1)
        a = self.softmax(e)
        a = self.dropout(a)
        attn_res = torch.einsum('ijh,jhf->ihf', a, g)
        if self.is_concat:
            return attn_res.reshape(n_nodes, self.n_heads * self.n_hidden)
        else:
            return attn_res.mean(dim = 1)


In [76]:
class GAT(torch.nn.Module):
    def __init__(self, in_features, hidden_features, out_features, n_heads, d_h):
        super(GAT, self).__init__()
        self.n_heads = n_heads
        self.attention1 = GraphAttentionLayer(in_features, hidden_features, n_heads)
        self.attention2 = GraphAttentionLayer(hidden_features, out_features, n_heads)
        self.norm= nn.LayerNorm(out_features)
        self.decoder = Decoder(out_features, hidden_features, out_features, n_heads, d_h)
    
    def forward(self, x, adj):
        x = self.attention1(x, adj)
        x = self.attention2(x, adj)
        x = self.norm(x)
        x = F.softmax(x, dim=-1)
        return x
    
    def decode(self, x, v_prev, neighbors):
        return self.decoder(x, v_prev, neighbors)

In [80]:
def generate_random_weighted_graph(num_nodes, num_edges, max_weight=10):
    # 방향 그래프 생성
    graph = nx.Graph()
    
    # 노드 추가
    nodes = range(num_nodes)
    graph.add_nodes_from(nodes)
    
    # 간선 추가
    edges = []
    for i in range(num_edges):
        # 임의의 출발 노드와 도착 노드 선택
        source = random.choice(nodes)
        target = random.choice(nodes)
        
        # 출발 노드와 도착 노드가 같은 경우 건너뜀
        if source == target:
            continue
        
        # 가중치 랜덤 생성
        weight = random.randint(1, max_weight)
        
        # 간선 추가
        edges.append((source, target, weight))

    adj_matrix = nx.adjacency_matrix(graph)
    adj_matrix = adj_matrix + sp.eye(adj_matrix.shape[0]) # Add self-loop
    adj_tensor = torch.Tensor(adj_matrix.todense())

    in_features = 8
    x = torch.randn(num_nodes, in_features)

    adj_tensor = adj_tensor.unsqueeze(0)
    adj_tensor = adj_tensor.repeat(num_nodes, 1, 1)
    adj_tensor = adj_tensor.transpose(0,1)
        
    graph.add_weighted_edges_from(edges)
    
    return graph, x, adj_tensor

In [78]:
num_graphs = 100
output_file = 'random_undirected_graphs.pkl'

graphs = []

for _ in range(num_graphs):
    num_nodes, num_edges, max_weight = np.random.randint(1,20), np.random.randint(1,30), np.random.randint(1,30)
    graph, x, adj_tensor = generate_random_weighted_graph(num_nodes, num_edges, max_weight)
    graphs.append((x, adj_tensor))


# 그래프를 pickle 파일로 저장
with open(output_file, 'wb') as f:
    pickle.dump(graphs, f)

[(<networkx.classes.graph.Graph object at 0x0000027B38B96C40>, tensor([[ 1.7706, -1.2055, -0.3704, -0.7686,  0.0884, -1.5066, -0.4840, -0.4250],
        [ 0.8159, -0.7124,  0.5799, -0.6417,  1.3405, -0.0508,  1.2809, -0.6956],
        [ 0.4837, -0.6585,  1.6075, -1.2979,  0.7519, -0.9513, -2.7687,  1.1365],
        [ 0.7504,  0.1062, -0.9272, -0.4088,  0.7303, -0.4356, -0.6174, -1.9409],
        [ 0.7466,  0.1042, -0.9720,  0.9513,  0.3627,  0.2547, -0.2532, -1.0389],
        [-0.3728, -0.2352,  1.5166,  2.8446,  1.7003,  0.0339,  0.5677,  0.8085],
        [-1.1359,  0.0073,  0.2792,  0.4503, -1.7104,  0.7086,  0.3389,  0.1451],
        [ 0.0167,  0.4460, -1.2574, -0.0463, -0.1716, -0.1953, -1.6959, -0.5654],
        [ 0.5201,  0.9539,  0.3218,  0.4792,  0.0551,  0.1299, -0.1678,  0.0597],
        [-1.4824, -0.3210, -0.8946, -0.6142,  0.8668, -0.8864,  0.3181, -0.8725],
        [ 0.3666,  0.3337,  0.6815, -1.6004, -1.5428,  0.7039,  0.2321, -0.6248],
        [-0.8082,  0.2687,  0.9784,

In [None]:
# pickle 파일에서 그래프 데이터 로드
with open('random_undirected_graphs.pkl', 'rb') as f:
    graphs = pickle.load(f)

In [89]:
gat_models = []
for graph_idx, (graph, x, adj_tensor) in enumerate(graphs):
    in_features = x.shape[1]
    n_heads = adj_tensor.shape[2]
    hidden_features = 4 * n_heads
    out_features = 2 * n_heads
    d_h = 4 * n_heads
    gat_model = GAT(in_features, hidden_features, out_features, n_heads, d_h).cuda()
    gat_models.append(gat_model)
    x = x.cuda()
    adj_tensor = adj_tensor.cuda()
    output = gat_model(x, adj_tensor)
    print(f"Graph {i+1} - Output:")
    print(output)
    #output : 각 노드에 대한 클래스 라벨 예측 값


Graph 1 - Output:
tensor([[0.0315, 0.0252, 0.0476, 0.0362, 0.0064, 0.0167, 0.0782, 0.0782, 0.0170,
         0.0529, 0.0250, 0.0116, 0.2265, 0.0175, 0.0083, 0.0825, 0.0326, 0.0198,
         0.0032, 0.0133, 0.0555, 0.0024, 0.0176, 0.0294, 0.0274, 0.0061, 0.0235,
         0.0081],
        [0.0347, 0.0257, 0.0283, 0.0493, 0.0130, 0.0188, 0.2243, 0.1022, 0.0244,
         0.0311, 0.0144, 0.0113, 0.1058, 0.0245, 0.0167, 0.0624, 0.0133, 0.0176,
         0.0027, 0.0079, 0.0200, 0.0024, 0.0184, 0.0104, 0.0307, 0.0288, 0.0560,
         0.0047],
        [0.0391, 0.0178, 0.0212, 0.0442, 0.0134, 0.0156, 0.1405, 0.1905, 0.0357,
         0.0155, 0.0158, 0.0131, 0.0687, 0.0497, 0.0098, 0.0967, 0.0153, 0.0138,
         0.0062, 0.0053, 0.0634, 0.0015, 0.0125, 0.0132, 0.0240, 0.0166, 0.0244,
         0.0165],
        [0.0598, 0.0217, 0.0548, 0.0675, 0.0146, 0.0056, 0.0533, 0.0276, 0.0376,
         0.0222, 0.0305, 0.0134, 0.2417, 0.0412, 0.0096, 0.0551, 0.0307, 0.0227,
         0.0012, 0.0130, 0.0366, 0.00

In [92]:
# Set the optimizer and loss function
optimizer = optim.Adam(gat_model.parameters(), lr=0.01)
criterion = torch.nn.NLLLoss()

# Move the model and loss function to the GPU
gat_model = gat_model.cuda()
criterion = criterion.cuda()

# Training loop
epochs = 100

for epoch in range(epochs):
    total_loss = 0.0
    for graph_idx, (graph, x, adj_tensor) in enumerate(graphs):
        x = x.to(device)
        adj_tensor = adj_tensor.to(device)
         # Generate random labels for the current graph
        num_nodes = x.shape[0]
        labels = torch.tensor([random.randint(0, 1) for _ in range(num_nodes)]).to(device)
                
        # Zero the gradients
        gat_model.zero_grad()
        
        # Forward pass
        output = gat_models[graph_idx](x, adj_tensor)
        
        # Compute the loss
        loss = criterion(output, labels)
        total_loss += loss.item()
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
    
    # Calculate the average loss for the epoch
    average_loss = total_loss / len(graphs)
    
    for graph_idx in range(len(graphs)):
        print("Graph {}: Epoch: {:03d}, Loss: {:.4f}".format(graph_idx+1, epoch+1, average_loss))

Graph 1: Epoch: 001, Loss: -0.1192
Graph 2: Epoch: 001, Loss: -0.1192
Graph 3: Epoch: 001, Loss: -0.1192
Graph 4: Epoch: 001, Loss: -0.1192
Graph 5: Epoch: 001, Loss: -0.1192
Graph 6: Epoch: 001, Loss: -0.1192
Graph 7: Epoch: 001, Loss: -0.1192
Graph 8: Epoch: 001, Loss: -0.1192
Graph 9: Epoch: 001, Loss: -0.1192
Graph 10: Epoch: 001, Loss: -0.1192
Graph 11: Epoch: 001, Loss: -0.1192
Graph 12: Epoch: 001, Loss: -0.1192
Graph 13: Epoch: 001, Loss: -0.1192
Graph 14: Epoch: 001, Loss: -0.1192
Graph 15: Epoch: 001, Loss: -0.1192
Graph 16: Epoch: 001, Loss: -0.1192
Graph 17: Epoch: 001, Loss: -0.1192
Graph 18: Epoch: 001, Loss: -0.1192
Graph 19: Epoch: 001, Loss: -0.1192
Graph 20: Epoch: 001, Loss: -0.1192
Graph 21: Epoch: 001, Loss: -0.1192
Graph 22: Epoch: 001, Loss: -0.1192
Graph 23: Epoch: 001, Loss: -0.1192
Graph 24: Epoch: 001, Loss: -0.1192
Graph 25: Epoch: 001, Loss: -0.1192
Graph 26: Epoch: 001, Loss: -0.1192
Graph 27: Epoch: 001, Loss: -0.1192
Graph 28: Epoch: 001, Loss: -0.1192
G

참고
https://chioni.github.io/posts/gat/

In [93]:
# 그래프 데이터로 모델 학습
for graph_idx, (graph, x, adj_tensor) in enumerate(graphs):
    # Initialize the GAT model for the current graph
    in_features = x.shape[1]
    n_heads = adj_tensor.shape[1]
    hidden_features = 4 * n_heads
    out_features = 2 * n_heads
    d_h = 4 * n_heads
    gat_model = GAT(in_features, hidden_features, out_features, n_heads, d_h).to(device)

    # Set the optimizer and loss function
    optimizer = optim.Adam(gat_model.parameters(), lr=0.01)
    criterion = torch.nn.NLLLoss().to(device)

    # Move the feature matrix and adjacency tensor to the GPU
    x = x.to(device)
    adj_tensor = adj_tensor.to(device)

    # Training loop
    epochs = 100
    for epoch in range(epochs):
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        output = gat_model(x, adj_tensor)

        # Generate random labels for the current graph
        num_nodes = x.shape[0]
        labels = torch.tensor([random.randint(0, 1) for _ in range(num_nodes)]).to(device)

        # Compute the loss
        loss = criterion(output, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        print("Graph {}: Epoch: {:03d}, Loss: {:.4f}".format(graph_idx+1, epoch+1, loss.item()))

Graph 1: Epoch: 001, Loss: -0.0254
Graph 1: Epoch: 002, Loss: -0.1674
Graph 1: Epoch: 003, Loss: -0.3029
Graph 1: Epoch: 004, Loss: -0.3608
Graph 1: Epoch: 005, Loss: -0.3142
Graph 1: Epoch: 006, Loss: -0.4000
Graph 1: Epoch: 007, Loss: -0.5569
Graph 1: Epoch: 008, Loss: -0.4095
Graph 1: Epoch: 009, Loss: -0.3047
Graph 1: Epoch: 010, Loss: -0.4282
Graph 1: Epoch: 011, Loss: -0.4983
Graph 1: Epoch: 012, Loss: -0.6975
Graph 1: Epoch: 013, Loss: -0.4644
Graph 1: Epoch: 014, Loss: -0.5200
Graph 1: Epoch: 015, Loss: -0.2665
Graph 1: Epoch: 016, Loss: -0.6714
Graph 1: Epoch: 017, Loss: -0.4025
Graph 1: Epoch: 018, Loss: -0.4637
Graph 1: Epoch: 019, Loss: -0.6675
Graph 1: Epoch: 020, Loss: -0.4056
Graph 1: Epoch: 021, Loss: -0.5430
Graph 1: Epoch: 022, Loss: -0.5476
Graph 1: Epoch: 023, Loss: -0.4104
Graph 1: Epoch: 024, Loss: -0.6197
Graph 1: Epoch: 025, Loss: -0.2773
Graph 1: Epoch: 026, Loss: -0.2756
Graph 1: Epoch: 027, Loss: -0.5531
Graph 1: Epoch: 028, Loss: -0.3454
Graph 1: Epoch: 029,