In [1]:
import os
import torch
import numpy as np
import networkx as nx
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

from sklearn.model_selection import train_test_split
from karateclub import BoostNE
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader

import warnings
warnings.filterwarnings("ignore")

In [2]:
class GraphDataset(Dataset):
    def __init__(self, 
                 graph_dir, 
                 graph_files):
        
        self.graph_dir = graph_dir
        self.graph_files = graph_files        
    
    def __len__(self):
        return len(self.graph_files)

    def __getitem__(self, idx):
        
        # single graph
        file = self.graph_files[idx]
        
        # graph paths
        graph_gt_path = os.path.join(self.graph_dir, 'original', file)
        graph_del_path = os.path.join(self.graph_dir, 'deletion', file)
        graph_ins_path = os.path.join(self.graph_dir, 'insertion', file)
        
        # ground truth adj
        graph_gt = torch.from_numpy(nx.to_numpy_array(nx.read_gpickle(graph_gt_path))).float()
        
        # deletion graph embedding
        graph_del = nx.read_gpickle(graph_del_path)
        graph_ins = nx.read_gpickle(graph_ins_path)
        graph_comb = nx.compose(graph_ins, graph_del)
        
        edge_index = np.array(graph_comb.edges()).T
        x = torch.ones(len(graph_comb),1)
        
        return graph_gt, edge_index, x

In [86]:
class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(1, 32)
        self.conv2 = GCNConv(32, 64)

    def forward(self, edge_index, x):

        x = self.conv1(x, edge_index)
        x = F.leaky_relu(x)
        x = self.conv2(x, edge_index)
        z = F.leaky_relu(x)
        
        # reconstruct adj
        A_tild = torch.matmul(z, z.T)
        
        return A_tild

In [87]:
# parameters
lr = 1e-4
epochs = 20
batch_size = 1
emb_model = BoostNE(dimensions=16, iterations=15)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GCN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion= nn.BCEWithLogitsLoss(pos_weight=torch.tensor(10))

# load files
train_files = os.listdir('../graph-data/seattle-graphs/original/')
test_files = os.listdir('../graph-data/west-seattle-graphs/original/')
train_files, val_files = train_test_split(train_files, test_size=0.1, random_state=42)

# make datasets
train_data = GraphDataset('../graph-data/seattle-graphs/', train_files)
val_data = GraphDataset('../graph-data/seattle-graphs/', val_files)
test_data = GraphDataset('../graph-data/west-seattle-graphs/', test_files)

# data loader
train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [88]:
losses = []
for i, batch in enumerate(train_dataloader):
    
    # load data
    graph_gt, edge_index, x = batch
    graph_gt = graph_gt.squeeze_(0).to(device)
    labels = graph_gt.flatten()
    edge_index = edge_index.squeeze_(0).to(device)
    x = x.squeeze_(0).to(device)
    
    # make prediction
    optimizer.zero_grad()
    out = model(edge_index, x)
    logits = out.flatten()
    loss = criterion(logits, labels)
    loss.backward() 
    optimizer.step()
    losses.append(loss.item())
    
    if i % 50 == 0: print(loss.item())        

1.7260736227035522
1.3084449768066406
1.1746248006820679
1.1272807121276855
1.9075114727020264
1.1492056846618652
1.5539170503616333
1.2490254640579224
1.1311674118041992
1.1851170063018799
1.2541803121566772
1.0186675786972046
1.431530475616455
1.098233699798584
1.5818767547607422
0.8637858629226685
1.3302481174468994
1.4458212852478027
1.072042465209961
1.075125813484192
1.1262389421463013
1.18251371383667
1.639432430267334
1.363578200340271
1.093266248703003
2.067828893661499
0.9671208262443542
1.2436319589614868
1.0350104570388794
1.5872247219085693
1.1330480575561523
0.9363064169883728
1.1471842527389526
1.3069286346435547
1.0812033414840698
1.4681980609893799
1.1735368967056274
1.219736933708191
1.4247939586639404
1.044346570968628
1.0508898496627808
1.2202072143554688
1.48627507686615
1.029080867767334
1.0763130187988281
1.2169603109359741
1.4324545860290527
1.9777722358703613
1.1364822387695312
1.6229233741760254
1.29798424243927
1.3328421115875244
1.5058040618896484
1.33092212

In [11]:
acc = []
for i, batch in enumerate(val_dataloader):
    
    # load data
    graph_gt, edge_index, x = batch
    graph_gt = graph_gt.squeeze_(0).to(device)
    labels = graph_gt.flatten()
    edge_index = edge_index.squeeze_(0).to(device)
    x = x.squeeze_(0).to(device)
    
    # make prediction
    with torch.no_grad():
        A = model(edge_index, x)
    acc.append(torch.mean(1.0*( 1.0*(A.detach().cpu()>0.5) == graph_gt[0])).item())

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

In [None]:
acc = []
sig = nn.Sigmoid()
for i, batch in enumerate(val_dataloader):
    
    # load data
    graph_gt, graph_del, graph_ins, index = batch
    graph_del = graph_del.to(device)
    graph_ins = graph_ins.to(device)
    
    # prediction
    with torch.no_grad():
        A = graph_model(graph_del, graph_ins)
    preds = [ sig(A[index[i]:index[i+1], index[i]:index[i+1]].detach().cpu())>0.5 for i in range(len(index)-1)]
    for p in range(len(preds)):
        acc.append(torch.mean(1.0*(preds[p] == graph_gt[p])).item())