In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import Data
import sys
sys.path.extend([ '../', '../data'])
from data import Cora
import dataloader as dl
from model import Sage_En, Sage_Classifier, EdgePredictor
from smote import smote
from train import train_graph, test_graph, train_smote, test_smote, train_smote2, test_smote2

# Set device to GPU if available, else use CPU
device = torch.device("cuda")
print(f"Current device: {torch.cuda.get_device_name(torch.cuda.current_device())}" if torch.cuda.is_available() else "Current device: CPU")
torch.cuda.empty_cache()

Current device: NVIDIA A100 80GB PCIe


In [2]:
data_dir = '../data/cora'
data_obj = Cora(data_dir).load_data()
print(data_obj.validate(raise_on_error=True))
print(data_obj['x'], " Size = ", data_obj['x'].size())
print(data_obj.edge_index, " Size = ", data_obj.edge_index.size())
print(data_obj.y, " Size = ", data_obj.y.size())

True
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])  Size =  torch.Size([2708, 1433])
tensor([[   0,    0,    0,  ..., 1874, 1876, 1897],
        [  21,  905,  906,  ..., 2586, 1874, 2707]])  Size =  torch.Size([2, 5429])
tensor([6, 6, 1,  ..., 5, 5, 5])  Size =  torch.Size([2708])


In [3]:
lr = 0.001
num_epochs = 1000
weight_decay = 5e-4
hdim = 64
dropout = 0.1
im_class_num = 3
im_ratio = [0.5, 0.5, 0.5]
class_sample_num = 20
nclass = 7

In [4]:
c_train_num = dl.train_num(data_obj.y, im_class_num, class_sample_num, im_ratio)
print(c_train_num, sum(c_train_num))
train_idx, val_idx, test_idx, c_num_mat = dl.segregate(data_obj.y, c_train_num)
print("train_idx: ", train_idx, len(train_idx))
print("val_idx: ", val_idx, len(val_idx))
print("test_idx: ", test_idx, len(test_idx))
# print(c_num_mat)

[20, 20, 20, 20, 10, 10, 10] 110
0 818
1 217
2 426
3 298
4 351
5 180
6 418
train_idx:  [2196, 96, 792, 1420, 691, 1897, 1595, 1880, 2523, 1386, 2214, 2494, 609, 1809, 2390, 1688, 177, 204, 828, 1365, 754, 1317, 731, 2639, 2607, 1307, 2114, 131, 2406, 2357, 2529, 2580, 2492, 2457, 193, 760, 2541, 730, 1340, 891, 113, 2461, 2362, 2203, 538, 1127, 1774, 843, 2177, 1438, 1932, 69, 147, 1549, 2491, 435, 2572, 1965, 1198, 1379, 87, 1597, 1794, 1282, 2509, 47, 846, 1508, 1226, 1079, 1650, 360, 168, 778, 2460, 438, 1976, 608, 654, 2074, 2254, 116, 1497, 2299, 325, 1986, 2392, 689, 1085, 1592, 34, 478, 139, 202, 1711, 1817, 240, 1999, 374, 1289, 575, 1283, 2536, 1110, 2451, 2646, 2669, 1306, 862, 2380] 110
val_idx:  [1892, 594, 2113, 2030, 2259, 2592, 267, 417, 2700, 1854, 1087, 2146, 2512, 2554, 1088, 2545, 1278, 358, 2557, 1954, 1806, 1347, 1875, 2617, 1955, 1398, 324, 9, 801, 2028, 688, 2308, 1879, 350, 3, 655, 866, 2199, 1450, 297, 421, 410, 23, 599, 1251, 264, 1670, 424, 1121, 507, 574, 21

In [5]:
train_data = dl.dataloader(data_obj, train_idx)
val_data = dl.dataloader(data_obj, val_idx)
test_data = dl.dataloader(data_obj, test_idx)
print(train_data)
print(val_data)
print(test_data)

Data(x=[110, 1433], edge_index=[2, 8], y=[110])
Data(x=[175, 1433], edge_index=[2, 25], y=[175])
Data(x=[385, 1433], edge_index=[2, 119], y=[385])


In [6]:
encoder = Sage_En(train_data.x.shape[-1], hdim, dropout)
decoder = EdgePredictor(hdim)
#features = encoder(train_data)
classifier = Sage_Classifier(hdim, hdim, nclass, dropout)
#print(features.shape)

In [7]:
train_smote2(data_obj, encoder, classifier, decoder, num_epochs, lr, weight_decay, train_idx, val_idx, portion = 0, im_class_num = im_class_num, mode = "nsm")

Epoch [1/1000], Loss: 1.9299, Accuracy: 0.1818, Edge Accuracy: 1.0000
Class 0:AUC-ROC- 0.4839, F1 Score- 0.0000; Class 1:AUC-ROC- 0.4672, F1 Score- 0.0000; Class 2:AUC-ROC- 0.5067, F1 Score- 0.0000; Class 3:AUC-ROC- 0.4006, F1 Score- 0.0000; Class 4:AUC-ROC- 0.4960, F1 Score- 0.0000; Class 5:AUC-ROC- 0.5330, F1 Score- 0.0000; Class 6:AUC-ROC- 0.7160, F1 Score- 0.0000; Macro-Average AUC-ROC: 0.5148,Macro-Average F1 Score: 0.0000
torch.Size([2708, 64])
Validation Loss: 1.9445, Validation Accuracy: 0.1429, Validation Edge Accuracy: 1.0000
Class 0:AUC-ROC- 0.5877, F1 Score- 0.0000; Class 1:AUC-ROC- 0.5931, F1 Score- 0.0000; Class 2:AUC-ROC- 0.6698, F1 Score- 0.0000; Class 3:AUC-ROC- 0.6096, F1 Score- 0.0000; Class 4:AUC-ROC- 0.5832, F1 Score- 0.0000; Class 5:AUC-ROC- 0.5876, F1 Score- 0.0000; Class 6:AUC-ROC- 0.7800, F1 Score- 0.0000; Macro-Average AUC-ROC: 0.6301,Macro-Average F1 Score: 0.0000
Epoch [2/1000], Loss: 1.9165, Accuracy: 0.1818, Edge Accuracy: 1.0000
Class 0:AUC-ROC- 0.9017, F

In [8]:
test_smote2(data_obj, encoder, classifier, decoder, test_idx, mode = "nsm")

torch.Size([2708, 64])
Test Loss: 1.0904, Test Accuracy: 0.6494, Test Edge Accuracy: 1.0000
Class 0:AUC-ROC- 0.8919, F1 Score- 0.6618; Class 1:AUC-ROC- 0.9641, F1 Score- 0.7379; Class 2:AUC-ROC- 0.9164, F1 Score- 0.6750; Class 3:AUC-ROC- 0.9228, F1 Score- 0.7033; Class 4:AUC-ROC- 0.8678, F1 Score- 0.3100; Class 5:AUC-ROC- 0.9157, F1 Score- 0.4531; Class 6:AUC-ROC- 0.9671, F1 Score- 0.7525; Macro-Average AUC-ROC: 0.9208,Macro-Average F1 Score: 0.6134
