In [17]:
import os
import torch
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
%matplotlib inline
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, ChebConv
import numpy as np
from torch_geometric.utils import to_dense_adj
import pygsp
import seaborn as sns
import sys
sys.path.append("../")
from src.embedding import EmbHeatGeo
from src.mds import embed_MDS
from scipy.spatial.distance import pdist, squareform

In [18]:
dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())
data = dataset[0]

In [19]:
from torch_geometric.nn import ChebConv
class ChebGCN(torch.nn.Module):
    def __init__(self,hidden_channels, output_dim=20 ,K=5):
        super().__init__()
        torch.manual_seed(1234567)
        self.conv1 = ChebConv(data.num_features, hidden_channels, K=K)
        self.conv2 = ChebConv(hidden_channels, output_dim, K=K)
    
    def graph_diffusion(self,x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        return x

    def forward(self, x, edge_index):
        x = self.graph_diffusion(x,edge_index)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        x = F.softmax(x,dim=1)
        return x

model = ChebGCN(hidden_channels=16,K=2)

In [20]:
adj = to_dense_adj(data.edge_index).squeeze(0)
graph = pygsp.graphs.Graph(adj.cpu().detach().numpy(), lap_type="normalized")
graph.estimate_lmax()
tau = np.geomspace(0.5, 100, 10)
n = adj.shape[0]
filt = pygsp.filters.Heat(graph, tau=tau)
heat_kernel = filt.filter(np.eye(n), order=30).reshape(
            n, n, -1
        )
heat_kernel[heat_kernel < 0] = 0
multi_geo = [
            np.sqrt(-4 * tau[i] * np.log(heat_kernel[:, :, i] + 1e-16)) ** 2
            for i in range(len(tau))
        ]
weights = 1 - tau / tau.sum()
w_t = weights.sum()
weights = weights / w_t if w_t > 0 else None
dist = torch.from_numpy(np.average(multi_geo, axis=0, weights=weights)).float()

In [51]:
# Trained to match heat-geodesic distances. 
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.MSELoss()

def train():
      train_idx = np.ix_(data.train_mask, data.train_mask)
      model.train()
      optimizer.zero_grad()  # Clear gradients.
      out = model(data.x, data.edge_index)  # Perform a single forward pass.
      dist_pred = torch.cdist(out,out)
      loss = criterion(dist_pred[train_idx], dist[train_idx])  # Compute the loss solely based on the training nodes.
      loss.backward()  # Derive gradients.
      optimizer.step()  # Update parameters based on gradients.
      return loss

def train_class_loss():
      model.eval()
      out = model(data.x, data.edge_index)
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      test_correct = pred[data.train_mask] == data.y[data.train_mask]  # Check against ground-truth labels.
      test_acc = int(test_correct.sum()) / int(data.train_mask.sum())  # Derive ratio of correct predictions.
      return test_acc

for epoch in range(1, 100):
    loss = train()
    loss_class_train = train_class_loss()
    if epoch % 5 == 0:
      print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
      print(f'Epoch: {epoch:03d}, Loss: {loss_class_train:.4f}')

Epoch: 005, Loss: 1971548.6250
Epoch: 005, Loss: 0.0786
Epoch: 010, Loss: 1971373.2500
Epoch: 010, Loss: 0.0714
Epoch: 015, Loss: 1971062.1250
Epoch: 015, Loss: 0.0500
Epoch: 020, Loss: 1970699.8750
Epoch: 020, Loss: 0.0429
Epoch: 025, Loss: 1970522.7500
Epoch: 025, Loss: 0.0357
Epoch: 030, Loss: 1970215.2500
Epoch: 030, Loss: 0.0929
Epoch: 035, Loss: 1970089.5000
Epoch: 035, Loss: 0.1143
Epoch: 040, Loss: 1969975.3750
Epoch: 040, Loss: 0.0714
Epoch: 045, Loss: 1969879.6250
Epoch: 045, Loss: 0.0714
Epoch: 050, Loss: 1969788.3750
Epoch: 050, Loss: 0.0786
Epoch: 055, Loss: 1969682.7500
Epoch: 055, Loss: 0.1000
Epoch: 060, Loss: 1969622.6250
Epoch: 060, Loss: 0.0929
Epoch: 065, Loss: 1969571.3750
Epoch: 065, Loss: 0.1143
Epoch: 070, Loss: 1969612.1250
Epoch: 070, Loss: 0.1214
Epoch: 075, Loss: 1969592.1250
Epoch: 075, Loss: 0.1357
Epoch: 080, Loss: 1969546.7500
Epoch: 080, Loss: 0.1214
Epoch: 085, Loss: 1969562.3750
Epoch: 085, Loss: 0.1143
Epoch: 090, Loss: 1969584.1250
Epoch: 090, Loss:

Relative heat-geodesic distance as node feature

In [21]:
# augment feature with 0. 
zeros = torch.zeros((data.num_nodes,1))
data.x = torch.hstack((data.x,zeros))
data.num_features = data.x.shape[1]

In [22]:
class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        torch.manual_seed(1234567)
        self.conv1 = GCNConv(data.num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

model = GCN(hidden_channels=16)

In [23]:
model = GCN(hidden_channels=16)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train():
      model.train()
      optimizer.zero_grad()  # Clear gradients.
      out = model(data.x, data.edge_index)  # Perform a single forward pass.
      loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
      loss.backward()  # Derive gradients.
      optimizer.step()  # Update parameters based on gradients.
      return loss

def test():
      model.eval()
      out = model(data.x, data.edge_index)
      pred = out.argmax(dim=1)  # Use the class with highest probability.
      test_correct = pred[data.test_mask] == data.y[data.test_mask]  # Check against ground-truth labels.
      test_acc = int(test_correct.sum()) / int(data.test_mask.sum())  # Derive ratio of correct predictions.
      return test_acc


for epoch in range(1, 501):
    loss = train()
    if epoch % 50 ==0:
      loss_test = test()
      print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
      print(f'Epoch: {epoch:03d}, Loss test: {loss_test:.4f}')

Epoch: 050, Loss: 1.0662
Epoch: 050, Loss test: 0.7750
Epoch: 100, Loss: 0.5359
Epoch: 100, Loss test: 0.7920
Epoch: 150, Loss: 0.4030
Epoch: 150, Loss test: 0.7970
Epoch: 200, Loss: 0.3381
Epoch: 200, Loss test: 0.7920
Epoch: 250, Loss: 0.2531
Epoch: 250, Loss test: 0.7870
Epoch: 300, Loss: 0.2290
Epoch: 300, Loss test: 0.7900
Epoch: 350, Loss: 0.2317
Epoch: 350, Loss test: 0.7870
Epoch: 400, Loss: 0.2140
Epoch: 400, Loss test: 0.7900
Epoch: 450, Loss: 0.2206
Epoch: 450, Loss test: 0.8020
Epoch: 500, Loss: 0.1972
Epoch: 500, Loss test: 0.8040


In [28]:
# with relative heat distance.
data = dataset[0]
emb_op = EmbHeatGeo(knn=10)
emb_op.fit(data.x.detach().numpy())
emb_op.metric_computation(data.x.detach().numpy(), n_tau=2)
relative_dist = torch.from_numpy(emb_op.get_relative_dist()).unsqueeze(1).float()
relative_dist /= relative_dist.max()
data.x = torch.hstack((data.x,relative_dist))
data.num_features = data.x.shape[1]



In [29]:
model = GCN(hidden_channels=16)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()
for epoch in range(1, 501):
    loss = train()
    if epoch % 50 ==0:
      loss_test = test()
      print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
      print(f'Epoch: {epoch:03d}, Loss test: {loss_test:.4f}')

Epoch: 050, Loss: 1.2026
Epoch: 050, Loss test: 0.7770
Epoch: 100, Loss: 0.5699
Epoch: 100, Loss test: 0.7970
Epoch: 150, Loss: 0.4386
Epoch: 150, Loss test: 0.7970
Epoch: 200, Loss: 0.3837
Epoch: 200, Loss test: 0.7930
Epoch: 250, Loss: 0.3055
Epoch: 250, Loss test: 0.7960
Epoch: 300, Loss: 0.2686
Epoch: 300, Loss test: 0.8050
Epoch: 350, Loss: 0.2344
Epoch: 350, Loss test: 0.7890
Epoch: 400, Loss: 0.2276
Epoch: 400, Loss test: 0.7950
Epoch: 450, Loss: 0.2399
Epoch: 450, Loss test: 0.8060
Epoch: 500, Loss: 0.1981
Epoch: 500, Loss test: 0.7990
