In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import Data
import sys
sys.path.extend([ '../', '../data'])
from data import Cora
import dataloader as dl
from model import Sage_En, Sage_Classifier, EdgePredictor
from smote import smote
from train import train_graph, test_graph, train_smote, test_smote, train_smote2, test_smote2

# Set device to GPU if available, else use CPU
device = torch.device("cuda")
print(f"Current device: {torch.cuda.get_device_name(torch.cuda.current_device())}" if torch.cuda.is_available() else "Current device: CPU")
torch.cuda.empty_cache()

Current device: NVIDIA RTX A6000


In [2]:
data_dir = '../data/cora'
data_obj = Cora(data_dir).load_data()
print(data_obj.validate(raise_on_error=True))
print(data_obj['x'], " Size = ", data_obj['x'].size())
print(data_obj.edge_index, " Size = ", data_obj.edge_index.size())
print(data_obj.y, " Size = ", data_obj.y.size())

True
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])  Size =  torch.Size([2708, 1433])
tensor([[   0,    0,    0,  ..., 1874, 1876, 1897],
        [  21,  905,  906,  ..., 2586, 1874, 2707]])  Size =  torch.Size([2, 5429])
tensor([6, 6, 1,  ..., 5, 5, 5])  Size =  torch.Size([2708])


In [3]:
from torch_geometric.utils import to_dense_adj
adj_old = to_dense_adj(data_obj.edge_index)[0]
print(adj_old[:5][:10], adj_old.shape, torch.sum(adj_old))
print(adj_old.dtype)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]) torch.Size([2708, 2708]) tensor(5429.)
torch.float32


In [4]:
lr = 0.001
num_epochs = 100
weight_decay = 5e-4
hdim = 64
dropout = 0.1
im_class_num = 3
im_ratio = [0.8, 0.6, 0.4]
class_sample_num = 20
nclass = 7

In [5]:
c_train_num = dl.train_num(data_obj.y, im_class_num, class_sample_num, im_ratio)
print(c_train_num, sum(c_train_num))
train_idx, val_idx, test_idx, c_num_mat = dl.segregate(data_obj.y, c_train_num)
print("train_idx: ", train_idx, len(train_idx))
print("val_idx: ", val_idx, len(val_idx))
print("test_idx: ", test_idx, len(test_idx))
# print(c_num_mat)

[20, 20, 20, 20, 16, 12, 8] 116
0 818
1 217
2 426
3 298
4 351
5 180
6 418
train_idx:  [1044, 438, 2164, 136, 1139, 2141, 1399, 249, 1307, 653, 1829, 94, 1805, 1920, 1965, 2344, 2165, 2497, 1269, 2075, 2172, 2015, 1876, 2011, 1492, 658, 2069, 783, 1340, 426, 1516, 2085, 1908, 2126, 1681, 1481, 2028, 2575, 1968, 626, 376, 180, 81, 65, 1740, 1555, 283, 1589, 383, 2128, 450, 1517, 2253, 1416, 1141, 979, 1057, 86, 2635, 1626, 1839, 883, 49, 127, 1907, 2114, 558, 1137, 61, 624, 2367, 1480, 736, 631, 1904, 990, 553, 684, 2521, 1193, 132, 1854, 439, 1035, 2647, 1776, 726, 2474, 1210, 1406, 818, 141, 247, 306, 2138, 2029, 2329, 458, 577, 204, 1318, 1205, 6, 1061, 1932, 2160, 121, 1723, 182, 303, 1598, 2000, 730, 1033, 932, 517] 116
val_idx:  [789, 1758, 339, 300, 2584, 1125, 1389, 2588, 2043, 1869, 2484, 1520, 2500, 794, 679, 1982, 1123, 2166, 1996, 1780, 1336, 741, 2116, 2135, 1437, 378, 1398, 827, 1966, 261, 432, 2006, 705, 2092, 977, 414, 421, 324, 2619, 2317, 111, 801, 252, 2413, 122, 429, 

In [6]:
train_data = dl.dataloader(data_obj, train_idx)
val_data = dl.dataloader(data_obj, val_idx)
test_data = dl.dataloader(data_obj, test_idx)
print(train_data)
print(val_data)
print(test_data)

Data(x=[116, 1433], edge_index=[2, 14], y=[116])
Data(x=[175, 1433], edge_index=[2, 28], y=[175])
Data(x=[385, 1433], edge_index=[2, 122], y=[385])


In [7]:
encoder = Sage_En(train_data.x.shape[-1], hdim, dropout)
decoder = EdgePredictor(hdim)
#features = encoder(train_data)
classifier = Sage_Classifier(hdim, hdim, nclass, dropout)
#print(features.shape)

In [8]:
train_smote(train_data, val_data, encoder, classifier, decoder, num_epochs, lr, weight_decay, train_idx, portion = 0, im_class_num = im_class_num)

torch.Size([127, 64])
Epoch [1/100], Loss: 1.9479, Accuracy: 0.1575, Edge Accuracy: 0.5836
Class 0:AUC-ROC- 0.5523, F1 Score- 0.0000; Class 1:AUC-ROC- 0.5790, F1 Score- 0.0000; Class 2:AUC-ROC- 0.4757, F1 Score- 0.0000; Class 3:AUC-ROC- 0.4963, F1 Score- 0.0000; Class 4:AUC-ROC- 0.5735, F1 Score- 0.0000; Class 5:AUC-ROC- 0.6482, F1 Score- 0.0000; Class 6:AUC-ROC- 0.6244, F1 Score- 0.0000; Macro-Average AUC-ROC: 0.5642,Macro-Average F1 Score: 0.0000
torch.Size([175, 64])
Validation Loss: 16.2046, Validation Accuracy: 0.1486, Validation Edge Accuracy: 0.5814
Class 0:AUC-ROC- 0.5459, F1 Score- 0.0000; Class 1:AUC-ROC- 0.5855, F1 Score- 0.0000; Class 2:AUC-ROC- 0.4904, F1 Score- 0.0000; Class 3:AUC-ROC- 0.4967, F1 Score- 0.0000; Class 4:AUC-ROC- 0.3895, F1 Score- 0.0000; Class 5:AUC-ROC- 0.6043, F1 Score- 0.0000; Class 6:AUC-ROC- 0.5548, F1 Score- 0.0000; Macro-Average AUC-ROC: 0.5238,Macro-Average F1 Score: 0.0000
torch.Size([127, 64])
Epoch [2/100], Loss: 1.9414, Accuracy: 0.1575, Edge A

In [9]:
test_smote(test_data, encoder, classifier, decoder)

torch.Size([385, 64])
Test Loss: 61.8689, Test Accuracy: 0.3636, Test Edge Accuracy: 0.5250
Class 0:AUC-ROC- 0.7559, F1 Score- 0.0357; Class 1:AUC-ROC- 0.8023, F1 Score- 0.0000; Class 2:AUC-ROC- 0.8452, F1 Score- 0.0000; Class 3:AUC-ROC- 0.8600, F1 Score- 0.0000; Class 4:AUC-ROC- 0.6926, F1 Score- 0.0000; Class 5:AUC-ROC- 0.7310, F1 Score- 0.0000; Class 6:AUC-ROC- 0.7831, F1 Score- 0.0000; Macro-Average AUC-ROC: 0.7815,Macro-Average F1 Score: 0.0051
