In [1]:
import os
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch_geometric
from torch_geometric.datasets import CoraFull, Planetoid, CitationFull
import torch_geometric.nn as gnn 
import torch_geometric.transforms as T

#from models import GPS
#from utils import test_model

torch.manual_seed(42)
torch.cuda.manual_seed(42)
np.random.seed(42)
random.seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

  from .autonotebook import tqdm as notebook_tqdm


device(type='cuda', index=0)

In [2]:
transform = T.Compose([T.NormalizeFeatures(), T.AddRandomWalkPE(3, 'RWPE')])
dataset = CitationFull(root='dataset/Cora', name='Cora', transform=transform)

In [3]:
#torch.save(dataset, "/home/sujin/project/DS503/ds503/dataset/Cora/cora/RandomWalkPE.pt")
#dataset = torch.load("/home/sujin/project/DS503/ds503/dataset/Cora/cora/RandomWalkPE.pt")

In [3]:
data = dataset[0]
df = pd.DataFrame(data.x.cpu())
df['y'] = data.y.cpu()
train, valid = train_test_split(df, stratify=df.y, test_size=0.33)
data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
data.train_mask[train.index]=True
data = data.to(device)

In [4]:
import random
import numpy as np

import torch
import torch.nn as nn

import torch_geometric.nn as gnn 

import torch
from torch.nn import Embedding, Linear, ModuleList, ReLU, Sequential
from torch_geometric.nn.conv import GINConv, GPSConv

class GPS(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, pe_dim, number_of_classes, number_of_nodes, num_layers, heads=8, device = 'cuda:0'):
        super().__init__()

        self.in_channels = in_channels
        self.hidden_channels = hidden_channels
        self.pe_dim = pe_dim
        self.number_of_classes = number_of_classes
        self.number_of_nodes = number_of_nodes
        self.num_layers = num_layers
        self.heads=heads
        self.model = self.build_model().to(device)

    def forward(self, x, pe, edge_index):
        x = self.node_emb(x) + self.pe_lin(pe)
        return self.model(x, edge_index)
    
    def build_model(self):
        
        self.node_emb = Linear(self.in_channels, self.hidden_channels).to(device)
        self.pe_lin = Linear(self.pe_dim, self.hidden_channels).to(device)

        layers = []
        for _ in range(self.num_layers):
            net = Sequential(
                Linear(self.hidden_channels, self.hidden_channels),
                ReLU(),
                Linear(self.hidden_channels, self.hidden_channels),
            ).to(device)
            
            conv = GPSConv(channels = self.hidden_channels, conv = GINConv(net), heads=self.heads, attn_dropout=0.5)
            layers.append((conv,'x, edge_index -> x'))

        layers.append((Linear(self.hidden_channels, self.number_of_classes),'x -> x'))
        print(layers)

        return gnn.Sequential('x, edge_index', layers)
    
        
    def get_n_params(self):
        pp=0
        for p in list(self.model.parameters()):
            nn=1
            for s in list(p.size()):
                nn = nn*s
            pp += nn
        return pp

In [7]:
model = GPS(in_channels = data.x.shape[1],\
             hidden_channels=320,\
                pe_dim = 3,\
                  number_of_classes = len(data.y.unique()),\
                    number_of_nodes = data.x.shape[0],\
                        num_layers = 5,\
                          heads=4,\
                              device="cuda:0")
print(model.get_n_params())
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)

[(GPSConv(320, conv=GINConv(nn=Sequential(
  (0): Linear(in_features=320, out_features=320, bias=True)
  (1): ReLU()
  (2): Linear(in_features=320, out_features=320, bias=True)
)), heads=4), 'x, edge_index -> x'), (GPSConv(320, conv=GINConv(nn=Sequential(
  (0): Linear(in_features=320, out_features=320, bias=True)
  (1): ReLU()
  (2): Linear(in_features=320, out_features=320, bias=True)
)), heads=4), 'x, edge_index -> x'), (GPSConv(320, conv=GINConv(nn=Sequential(
  (0): Linear(in_features=320, out_features=320, bias=True)
  (1): ReLU()
  (2): Linear(in_features=320, out_features=320, bias=True)
)), heads=4), 'x, edge_index -> x'), (GPSConv(320, conv=GINConv(nn=Sequential(
  (0): Linear(in_features=320, out_features=320, bias=True)
  (1): ReLU()
  (2): Linear(in_features=320, out_features=320, bias=True)
)), heads=4), 'x, edge_index -> x'), (GPSConv(320, conv=GINConv(nn=Sequential(
  (0): Linear(in_features=320, out_features=320, bias=True)
  (1): ReLU()
  (2): Linear(in_features=320, 

In [8]:
def train_model(model, data:torch_geometric.data.data.Data, optimizer, criterion):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.RWPE, data.edge_index)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    pred = out.argmax(dim=-1)
    correct = pred[data.train_mask] == data.y[data.train_mask]
    acc = int(correct.sum()) / int(data.train_mask.sum())
    return loss

In [9]:
for epoch in range(200):
    loss = train_model(model, data, optimizer, criterion)
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

RuntimeError: CUDA out of memory. Tried to allocate 5.84 GiB (GPU 0; 23.70 GiB total capacity; 14.33 GiB already allocated; 4.05 GiB free; 18.52 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [12]:
model.eval()
out = model(data.x, data.RWPE, data.edge_index)
pred = out.argmax(dim=-1)
correct = (pred[~data.train_mask] == data.y[~data.train_mask])
acc = int(correct.sum()) / int(correct.shape[0])
f1 = f1_score(data.y[~data.train_mask.cpu()].cpu(), pred[~data.train_mask.cpu()].cpu(), average='micro')
report = pd.DataFrame(classification_report(data.y[~data.train_mask.cpu()].cpu(), pred[~data.train_mask.cpu()].cpu(), output_dict=True))

In [13]:
report.to_csv('./results/gps_coraml.csv')