In [1]:
import networkx as nx

In [2]:
def obtain_metrics(G):
    mean_degree = (2 * G.number_of_edges()) / G.number_of_nodes()

    largest_component = max(nx.connected_components(G), key=len)
    subgraph = G.subgraph(largest_component)

    largest_component = max(nx.connected_components(G), key=len)
    subgraph = G.subgraph(largest_component)
    #diameter = nx.diameter(subgraph)
    #avg_dist = nx.average_shortest_path_length(subgraph)
    
    print(f"La red tiene {G.number_of_nodes()} nodos")
    print(f"La red tiene {nx.number_connected_components(G)} componentes")
    print(f"La red tiene {G.number_of_edges()} aristas")
    print(f"El grado promedio de la red es {mean_degree}")
    #print("Diámetro de la red:", diameter)
    #print("Distancia promedio en la componente más grande:", avg_dist)

In [3]:
import random, numpy as np, torch
import torch.nn as nn
import torch.nn.functional as F

from torch_geometric.data import Data
from torch_geometric.nn import TransformerConv, GlobalAttention
from torch_geometric.utils import to_dense_adj, degree

from dataclasses import dataclass

import scipy.sparse as sp
from scipy.sparse.linalg import eigsh

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


In [5]:
def laplacian_pe(edge_index, num_nodes, k: int, device):
    # Construye A dispersa simétrica
    row = edge_index[0].cpu().numpy()
    col = edge_index[1].cpu().numpy()
    data = np.ones(len(row), dtype=np.float32)
    A = sp.coo_matrix((data, (row, col)), shape=(num_nodes, num_nodes)).tocsr()

    # Laplaciano normalizado: L = I - D^{-1/2} A D^{-1/2}
    deg = np.asarray(A.sum(axis=1)).ravel()
    deg[deg == 0] = 1.0
    D_inv_sqrt = sp.diags(1.0 / np.sqrt(deg))
    L = sp.eye(num_nodes, format="csr") - D_inv_sqrt @ A @ D_inv_sqrt

    # Autovectores más pequeños (k << N)
    k = min(k, num_nodes)  # por si acaso
    vals, vecs = eigsh(L, k=k, which='SM')  # 'SM' = smallest magnitude
    U = torch.from_numpy(vecs).float().to(device)  # (N, k)
    return U

In [6]:
def degree_feature(edge_index, num_nodes, device):
    deg = degree(edge_index[0], num_nodes=num_nodes)  # cuenta bien en no-dirigido
    return torch.log1p(deg).unsqueeze(-1)

In [7]:
class GraphormerLiteEncoder(nn.Module):
    """Stack de TransformerConv con LapPE y degree encodings concatenados."""
    def __init__(self, in_ch, d_model=128, n_heads=4, n_layers=3, pe_dim=16, dropout=0.1):
        super().__init__()
        self.lin_in = nn.Linear(in_ch + pe_dim + 1, d_model)
        self.layers = nn.ModuleList([
            TransformerConv(d_model, d_model // n_heads, heads=n_heads, dropout=dropout, beta=True)
            for _ in range(n_layers)
        ])
        self.norms = nn.ModuleList([nn.LayerNorm(d_model) for _ in range(n_layers)])
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, edge_index, pe, deg_feat):
        # x: (N, F) ; pe: (N, pe_dim); deg_feat: (N,1)
        h = torch.cat([x, pe, deg_feat], dim=-1)
        h = self.lin_in(h)
        for conv, ln in zip(self.layers, self.norms):
            h_res = h
            h = conv(h, edge_index)
            h = F.relu(h)
            h = self.dropout(h)
            h = ln(h + h_res)
        return h  # (N, d_model)

    class GraphReadoutGA(nn.Module):
        def __init__(self, d_model, out_dim):
            super().__init__()
            self.gate = nn.Sequential(nn.Linear(d_model, 1))
            self.pool = GlobalAttention(self.gate)
            self.proj = nn.Linear(d_model, out_dim)

        def forward(self, H):
            batch = torch.zeros(H.size(0), dtype=torch.long, device=H.device)
            g = self.pool(H, batch)          # (1, d_model)
            return self.proj(g).squeeze(0)   # (out_dim,)

In [None]:
@dataclass
class Config:
    seed: int = 42
    device: str = 'cuda' if torch.cuda.is_available() else 'cpu'
    d_model: int = 128
    n_heads: int = 4
    n_layers: int = 3
    pe_dim: int = 16
    dropout: float = 0.1
    g_out: int = 256   # ← tamaño del embedding de grafo

set_seed(42)
cfg = Config()
device = torch.device(cfg.device)

# ====== CARGA TU GRAFO .GEXF Y CONSTRUYE PyG Data ======
# Ruta a tu archivo
gexf_path = "grafo_santiago_filtrado_con_embeddings.gexf"

# Lee con NetworkX
Gx = nx.read_gexf(gexf_path)

# Si es dirigido, conviértelo a no dirigido para que degree/PE sean consistentes
if isinstance(Gx, nx.DiGraph):
    Gx = nx.Graph(Gx)  # o Gx.to_undirected()

# Define un orden estable de nodos y un mapeo a índices [0..N-1]
nodes = list(Gx.nodes())
nid = {n: i for i, n in enumerate(nodes)}
N = len(nodes)

# --- Parseo de features por nodo ---
def parse_feature(attrs, n=64):
    keys = [f"A{i:02d}" for i in range(n)]
    return np.array([float(attrs[k]) for k in keys], dtype=np.float32)
    
X = np.stack([parse_feature(Gx.nodes[n]) for n in nodes], axis=0)  # (N, d)
d = X.shape[1]

# --- edge_index ---
edges = [(nid[u], nid[v]) for (u, v) in Gx.edges()]
# Asegura simetría (por si hay alguna arista suelta en un solo sentido)
edges = edges + [(j, i) for (i, j) in edges if (j, i) not in edges]

edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()  # (2, E)

# Construye Data y manda a device
data = Data(x=torch.tensor(X, dtype=torch.float32),
            edge_index=edge_index)
data = data.to(cfg.device)

# (Opcional) por si deseas guardar el id original de cada índice
# data.nid_map = nodes  # lista con el nombre original de cada nodo en la posición i

# ---- Preparación ----
pe = laplacian_pe(data.edge_index, data.num_nodes, cfg.pe_dim, device)   # (N, pe_dim)
deg_feat = degree_feature(data.edge_index, data.num_nodes, device)       # (N, 1)

encoder = GraphormerLiteEncoder(
    in_ch=data.x.size(1),
    d_model=cfg.d_model,
    n_heads=cfg.n_heads,
    n_layers=cfg.n_layers,
    pe_dim=cfg.pe_dim,
    dropout=cfg.dropout
).to(device)

readout = GraphReadoutGA(d_model=cfg.d_model, out_dim=cfg.g_out).to(device)

# ---- Solo codificar (sin entrenamiento) ----
encoder.eval(); readout.eval()
with torch.no_grad():
    H = encoder(data.x, data.edge_index, pe, deg_feat)   # (N, d_model)
    g_emb = readout(H)                                   # (g_out,)

torch.save(g_emb.cpu(), "graph_embedding.pt")
print("Embedding del grafo:", tuple(g_emb.shape), "guardado en graph_embedding.pt")