In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid, Flickr
from torch_geometric.loader import GraphSAINTRandomWalkSampler, GraphSAINTNodeSampler

from transformers import GPT2Model, GPT2Tokenizer
from graph_gpt_classification import Graph_GPT_Classification
from baselines import GCN_Classification, GAT_Classification



In [2]:
# dataset = Planetoid(root = '/tmp/Cora', name = 'Cora')
dataset = Flickr(root = './tmp/Flickr')
dataset[0]

Data(x=[89250, 500], edge_index=[2, 899756], y=[89250], train_mask=[89250], val_mask=[89250], test_mask=[89250])

In [3]:
loader = GraphSAINTNodeSampler(
    dataset[0],
    batch_size = 1280, 
    num_steps = 100)

In [50]:
len(loader)

100

In [41]:
for subgraph in loader:
    subgraph

In [42]:
subgraph

Data(num_nodes=711, edge_index=[2, 1816], x=[711, 500], y=[711], train_mask=[711], val_mask=[711], test_mask=[711])

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
data = dataset[0].to(device)
data

In [None]:
gat_model = GAT_Classification(dataset.num_node_features, 
                               100,
                               100,
                               dataset.num_classes).to(device)

optimizer = torch.optim.Adam(gat_model.parameters(), lr=0.001, weight_decay=5e-4)

gat_model.train()
for epoch in range(1000):
    optimizer.zero_grad()
    out = gat_model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])

    if (epoch + 1) % 10 == 0:
        t_loss = F.nll_loss(out[data.test_mask], data.y[data.test_mask])
        print(f'epoch: {epoch+1}, training loss: {loss.item()}, testing loss: {t_loss.item()}')
        
        pred = gat_model(data).argmax(dim=1)
        correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
        acc = int(correct) / int(data.test_mask.sum())
        print(f'Accuracy: {acc:.4f}')
    
    loss.backward()
    optimizer.step()

In [4]:
# initilize model
gcn_model = GCN_Classification(dataset.num_node_features, 
                               100,
                               100,
                               dataset.num_classes)

In [7]:
optimizer = torch.optim.Adam(gcn_model.parameters(), lr=0.001, weight_decay=5e-4)

gcn_model.train()
for epoch in range(1000):
    for subgraph in loader:
        subgraph.to(device)
        optimizer.zero_grad()
        out = gcn_model(subgraph)
        loss = F.nll_loss(out[subgraph.train_mask], subgraph.y[subgraph.train_mask])
    
        if (epoch + 1) % 10 == 0:
            t_loss = F.nll_loss(out[subgraph.test_mask], subgraph.y[subgraph.test_mask])
            print(f'epoch: {epoch+1}, training loss: {loss.item()}, testing loss: {t_loss.item()}')
            
            pred = gcn_model(subgraph).argmax(dim=1)
            correct = (pred[subgraph.test_mask] == subgraph.y[subgraph.test_mask]).sum()
            acc = int(correct) / int(subgraph.test_mask.sum())
            print(f'Accuracy: {acc:.4f}')
        
        loss.backward()
        optimizer.step()

epoch: 10, training loss: 1.4964473247528076, testing loss: 1.537003517150879
Accuracy: 0.4413
epoch: 10, training loss: 1.4905471801757812, testing loss: 1.603054165840149
Accuracy: 0.4327
epoch: 10, training loss: 1.5389080047607422, testing loss: 1.5880107879638672
Accuracy: 0.4224
epoch: 10, training loss: 1.4966007471084595, testing loss: 1.5964055061340332
Accuracy: 0.4255
epoch: 10, training loss: 1.5071686506271362, testing loss: 1.5903626680374146
Accuracy: 0.4241
epoch: 10, training loss: 1.5266331434249878, testing loss: 1.583277940750122
Accuracy: 0.4204
epoch: 10, training loss: 1.5424693822860718, testing loss: 1.6965174674987793
Accuracy: 0.3949
epoch: 10, training loss: 1.4544286727905273, testing loss: 1.5270622968673706
Accuracy: 0.4519
epoch: 10, training loss: 1.531178593635559, testing loss: 1.6760027408599854
Accuracy: 0.4362
epoch: 10, training loss: 1.476992130279541, testing loss: 1.5219155550003052
Accuracy: 0.4774
epoch: 10, training loss: 1.5462515354156494,

In [11]:
optimizer = torch.optim.Adam(gcn_model.parameters(), lr=0.001, weight_decay=5e-4)

gcn_model.train()
for epoch in range(1000):
    optimizer.zero_grad()
    out = gcn_model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])

    if (epoch + 1) % 10 == 0:
        t_loss = F.nll_loss(out[data.test_mask], data.y[data.test_mask])
        print(f'epoch: {epoch+1}, training loss: {loss.item()}, testing loss: {t_loss.item()}')
        
        pred = gcn_model(data).argmax(dim=1)
        correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
        acc = int(correct) / int(data.test_mask.sum())
        print(f'Accuracy: {acc:.4f}')
    
    loss.backward()
    optimizer.step()

epoch: 10, training loss: 1.6761001348495483, testing loss: 1.6795542240142822
Accuracy: 0.4032
epoch: 20, training loss: 1.6251262426376343, testing loss: 1.633974552154541
Accuracy: 0.4384
epoch: 30, training loss: 1.6103239059448242, testing loss: 1.621830940246582
Accuracy: 0.4635
epoch: 40, training loss: 1.5913920402526855, testing loss: 1.6060259342193604
Accuracy: 0.4686
epoch: 50, training loss: 1.5840343236923218, testing loss: 1.5941628217697144
Accuracy: 0.4717
epoch: 60, training loss: 1.5681936740875244, testing loss: 1.5830105543136597
Accuracy: 0.4740
epoch: 70, training loss: 1.563368320465088, testing loss: 1.5739176273345947
Accuracy: 0.4814
epoch: 80, training loss: 1.5536601543426514, testing loss: 1.5654451847076416
Accuracy: 0.4786
epoch: 90, training loss: 1.5425270795822144, testing loss: 1.5558490753173828
Accuracy: 0.4764
epoch: 100, training loss: 1.5299633741378784, testing loss: 1.5451009273529053
Accuracy: 0.4794
epoch: 110, training loss: 1.5203063488006

KeyboardInterrupt: 

In [16]:
# initilize model
gpt_model = GPT2Model.from_pretrained('distilgpt2')
graph_gpt_model = Graph_GPT_Classification(gpt_model,
                                           dataset.num_node_features, 
                                           128,
                                           dataset.num_classes)

graph_gpt_model.to(device)

Graph_GPT_Classification(
  (g_conv_1): GCNConv(500, 768)
  (transformer_layers): ModuleList(
    (0-5): 6 x GPT2Block(
      (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): GPT2Attention(
        (c_attn): Conv1D()
        (c_proj): Conv1D()
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
      (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (mlp): GPT2MLP(
        (c_fc): Conv1D()
        (c_proj): Conv1D()
        (act): NewGELUActivation()
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (g_conv_2): GCNConv(768, 7)
)

In [17]:
loader = GraphSAINTNodeSampler(
    dataset[0],
    batch_size = 256, 
    num_steps = 1000)

In [None]:
optimizer = torch.optim.Adam(graph_gpt_model.parameters(), lr=0.001, weight_decay=5e-4)

for epoch in range(10):
    step = 1
    for subgraph in loader:
        step += 1
        subgraph = subgraph.to(device)
        optimizer.zero_grad()
        out = graph_gpt_model(subgraph)
        loss = F.nll_loss(out[subgraph.train_mask], subgraph.y[subgraph.train_mask])

        if (step + 1) % 50 == 0:
            # print(f'step: {step+1}, training loss: {loss.item()}')
            t_loss = F.nll_loss(out[subgraph.test_mask], subgraph.y[subgraph.test_mask])
            print(f'epoch: {epoch + 1}, step: {step+1}, training loss: {loss.item()}, testing loss: {t_loss.item()}')

        if (step + 1) % 50 == 0:
            pred = graph_gpt_model(subgraph).argmax(dim=1)
            correct = (pred[subgraph.test_mask] == subgraph.y[subgraph.test_mask]).sum()
            acc = int(correct) / int(subgraph.test_mask.sum())
            print(f'Accuracy: {acc:.4f}')
            
        loss.backward()
        optimizer.step()

epoch: 1, step: 50, training loss: 6.843184947967529, testing loss: 6.887197971343994
Accuracy: 0.3115
epoch: 1, step: 100, training loss: 3.9646103382110596, testing loss: 4.012615203857422
Accuracy: 0.2881
epoch: 1, step: 150, training loss: 3.2658944129943848, testing loss: 3.02982497215271
Accuracy: 0.2239
epoch: 1, step: 200, training loss: 2.4180073738098145, testing loss: 4.494039058685303
Accuracy: 0.2833
epoch: 1, step: 250, training loss: 1.8815360069274902, testing loss: 2.364426374435425
Accuracy: 0.3478
epoch: 1, step: 300, training loss: 2.1509695053100586, testing loss: 1.7727957963943481
Accuracy: 0.3026
epoch: 1, step: 350, training loss: 2.1587321758270264, testing loss: 2.1124653816223145
Accuracy: 0.3750
epoch: 1, step: 400, training loss: 1.8465936183929443, testing loss: 2.1011505126953125
Accuracy: 0.3115
epoch: 1, step: 450, training loss: 1.6380594968795776, testing loss: 2.1301674842834473
Accuracy: 0.3269
epoch: 1, step: 500, training loss: 2.0073163509368896

In [6]:
graph_gpt_model.train()
for epoch in range(1000):
    optimizer.zero_grad()
    out = graph_gpt_model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])

    if (epoch + 1) % 10 == 0:
        t_loss = F.nll_loss(out[data.test_mask], data.y[data.test_mask])
        print(f'epoch: {epoch+1}, training loss: {loss.item()}, testing loss: {t_loss.item()}')
        
        pred = graph_gpt_model(data).argmax(dim=1)
        correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
        acc = int(correct) / int(data.test_mask.sum())
        print(f'Accuracy: {acc:.4f}')
    
    loss.backward()
    optimizer.step()

OutOfMemoryError: CUDA out of memory. Tried to allocate 1.02 GiB. GPU 0 has a total capacty of 31.74 GiB of which 235.38 MiB is free. Including non-PyTorch memory, this process has 31.51 GiB memory in use. Of the allocated memory 29.75 GiB is allocated by PyTorch, and 879.78 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF