In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import Data
import sys
sys.path.extend([ '../', '../data'])
from data import Cora
import dataloader as dl
from model import Sage_En, Sage_Classifier, EdgePredictor
from smote import smote
from train import train_graph, test_graph, train_smote, test_smote, train_smote2, test_smote2

# Set device to GPU if available, else use CPU
device = torch.device("cuda")
print(f"Current device: {torch.cuda.get_device_name(torch.cuda.current_device())}" if torch.cuda.is_available() else "Current device: CPU")
torch.cuda.empty_cache()

Current device: NVIDIA RTX A6000


In [2]:
data_dir = '../data/cora'
data_obj = Cora(data_dir).load_data()
print(data_obj.validate(raise_on_error=True))
print(data_obj['x'], " Size = ", data_obj['x'].size())
print(data_obj.edge_index, " Size = ", data_obj.edge_index.size())
print(data_obj.y, " Size = ", data_obj.y.size())

True
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])  Size =  torch.Size([2708, 1433])
tensor([[   0,    0,    0,  ..., 1874, 1876, 1897],
        [  21,  905,  906,  ..., 2586, 1874, 2707]])  Size =  torch.Size([2, 5429])
tensor([6, 6, 1,  ..., 5, 5, 5])  Size =  torch.Size([2708])


In [3]:
from torch_geometric.utils import to_dense_adj
adj_old = to_dense_adj(data_obj.edge_index)[0]
print(adj_old[:5][:10], adj_old.shape, torch.sum(adj_old))
print(adj_old.dtype)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]) torch.Size([2708, 2708]) tensor(5429.)
torch.float32


In [4]:
lr = 0.001
num_epochs = 100
weight_decay = 5e-4
hdim = 64
dropout = 0.1
im_class_num = 3
im_ratio = [0.8, 0.6, 0.4]
class_sample_num = 20
nclass = 7

In [5]:
c_train_num = dl.train_num(data_obj.y, im_class_num, class_sample_num, im_ratio)
print(c_train_num, sum(c_train_num))
train_idx, val_idx, test_idx, c_num_mat = dl.segregate(data_obj.y, c_train_num)
print("train_idx: ", train_idx, len(train_idx))
print("val_idx: ", val_idx, len(val_idx))
print("test_idx: ", test_idx, len(test_idx))
# print(c_num_mat)

[20, 20, 20, 20, 16, 12, 8] 116
0 818
1 217
2 426
3 298
4 351
5 180
6 418
train_idx:  [634, 1353, 2262, 2154, 2312, 2634, 1126, 1115, 1376, 57, 1116, 1068, 2152, 1002, 1768, 2670, 155, 1984, 795, 436, 1632, 2208, 1453, 1428, 2166, 1514, 1878, 1031, 1191, 779, 212, 1681, 614, 1152, 1039, 1839, 940, 2075, 728, 2023, 329, 846, 542, 336, 1876, 79, 1400, 2264, 1931, 2278, 2149, 591, 1434, 1257, 314, 933, 466, 1743, 2390, 114, 2286, 1285, 517, 1682, 776, 89, 2454, 2127, 2223, 667, 688, 1417, 2464, 2617, 532, 1903, 2136, 1607, 1652, 958, 1196, 289, 489, 2517, 1416, 702, 918, 961, 935, 905, 2703, 264, 374, 80, 917, 2687, 1664, 1496, 527, 922, 1994, 2275, 1851, 2418, 2507, 878, 2017, 2183, 363, 216, 1238, 274, 1717, 147, 2514, 2231] 116
val_idx:  [1445, 1405, 1005, 1983, 1960, 1004, 2257, 470, 635, 2168, 2356, 1000, 2557, 1399, 565, 1245, 563, 786, 449, 1865, 2279, 2395, 1894, 1307, 56, 308, 2413, 827, 1669, 2529, 113, 1879, 721, 301, 115, 2121, 248, 2025, 427, 26, 121, 1185, 1251, 262, 2308, 5

In [6]:
train_data = dl.dataloader(data_obj, train_idx)
val_data = dl.dataloader(data_obj, val_idx)
test_data = dl.dataloader(data_obj, test_idx)
print(train_data)
print(val_data)
print(test_data)

Data(x=[116, 1433], edge_index=[2, 13], y=[116])
Data(x=[175, 1433], edge_index=[2, 35], y=[175])
Data(x=[385, 1433], edge_index=[2, 121], y=[385])


In [7]:
encoder = Sage_En(train_data.x.shape[-1], hdim, dropout)
decoder = EdgePredictor(hdim)
#features = encoder(train_data)
classifier = Sage_Classifier(hdim, hdim, nclass, dropout)
#print(features.shape)

In [8]:
# Without smote
train_graph(train_data, val_data, encoder, classifier, num_epochs = 1000, lr = lr, weight_decay = weight_decay)

Epoch [1/1000], Loss: 1.9513, Accuracy: 0.1552
Class 0:AUC-ROC- 0.3750, F1 Score- 0.0000; Class 1:AUC-ROC- 0.4484, F1 Score- 0.0000; Class 2:AUC-ROC- 0.5401, F1 Score- 0.0000; Class 3:AUC-ROC- 0.5518, F1 Score- 0.0000; Class 4:AUC-ROC- 0.4934, F1 Score- 0.0000; Class 5:AUC-ROC- 0.6150, F1 Score- 0.0000; Class 6:AUC-ROC- 0.4965, F1 Score- 0.0000; Macro-Average AUC-ROC: 0.5029,Macro-Average F1 Score: 0.0000
Validation Loss: 1.9481, Validation Accuracy: 0.1371
Class 0:AUC-ROC- 0.5504, F1 Score- 0.0000; Class 1:AUC-ROC- 0.4317, F1 Score- 0.0000; Class 2:AUC-ROC- 0.5059, F1 Score- 0.0000; Class 3:AUC-ROC- 0.5169, F1 Score- 0.0000; Class 4:AUC-ROC- 0.4885, F1 Score- 0.0000; Class 5:AUC-ROC- 0.5767, F1 Score- 0.0000; Class 6:AUC-ROC- 0.4417, F1 Score- 0.0000; Macro-Average AUC-ROC: 0.5017,Macro-Average F1 Score: 0.0000
Epoch [2/1000], Loss: 1.9439, Accuracy: 0.1983
Class 0:AUC-ROC- 0.7112, F1 Score- 0.0000; Class 1:AUC-ROC- 0.7193, F1 Score- 0.0000; Class 2:AUC-ROC- 0.8109, F1 Score- 0.0000; 

In [9]:
test_graph(test_data, encoder, classifier)

Test Loss: 1.3890, Test Accuracy: 0.5481
Class 0:AUC-ROC- 0.8561, F1 Score- 0.4490; Class 1:AUC-ROC- 0.8735, F1 Score- 0.0000; Class 2:AUC-ROC- 0.8672, F1 Score- 0.0000; Class 3:AUC-ROC- 0.8297, F1 Score- 0.0000; Class 4:AUC-ROC- 0.7976, F1 Score- 0.0000; Class 5:AUC-ROC- 0.7703, F1 Score- 0.0000; Class 6:AUC-ROC- 0.8803, F1 Score- 0.0000; Macro-Average AUC-ROC: 0.8392,Macro-Average F1 Score: 0.0641
