In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import Data
import sys
sys.path.extend(['data'])
from data import Cora
import dataloader as dl
from model import Sage_En, Sage_Classifier, EdgePredictor
from smote import smote
from train import train_graph, test_graph, train_smote, test_smote, train_smote2, test_smote2

# Set device to GPU if available, else use CPU
device = torch.device("cuda")
print(f"Current device: {torch.cuda.get_device_name(torch.cuda.current_device())}" if torch.cuda.is_available() else "Current device: CPU")
torch.cuda.empty_cache()

Current device: NVIDIA RTX A6000


In [2]:
data_dir = 'data/cora'
data_obj = Cora(data_dir).load_data()
print(data_obj.validate(raise_on_error=True))
print(data_obj['x'], " Size = ", data_obj['x'].size())
print(data_obj.edge_index, " Size = ", data_obj.edge_index.size())
print(data_obj.y, " Size = ", data_obj.y.size())

True
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])  Size =  torch.Size([2708, 1433])
tensor([[   0,    0,    0,  ..., 1874, 1876, 1897],
        [  21,  905,  906,  ..., 2586, 1874, 2707]])  Size =  torch.Size([2, 5429])
tensor([6, 6, 1,  ..., 5, 5, 5])  Size =  torch.Size([2708])


In [3]:
from torch_geometric.utils import to_dense_adj
adj_old = to_dense_adj(data_obj.edge_index)[0]
print(adj_old[:5][:10], adj_old.shape, torch.sum(adj_old))
print(adj_old.dtype)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]) torch.Size([2708, 2708]) tensor(5429.)
torch.float32


In [4]:
lr = 0.001
num_epochs = 10
weight_decay = 5e-4
hdim = 64
dropout = 0.1
im_class_num = 3
im_ratio = [0.8, 0.6, 0.4]
class_sample_num = 20
nclass = 7

In [5]:
c_train_num = dl.train_num(data_obj.y, im_class_num, class_sample_num, im_ratio)
print(c_train_num, sum(c_train_num))
train_idx, val_idx, test_idx, c_num_mat = dl.segregate(data_obj.y, c_train_num)
print("train_idx: ", train_idx, len(train_idx))
print("val_idx: ", val_idx, len(val_idx))
print("test_idx: ", test_idx, len(test_idx))
# print(c_num_mat)

[20, 20, 20, 20, 16, 12, 8] 116
0 818
1 217
2 426
3 298
4 351
5 180
6 418
train_idx:  [2000, 765, 1933, 1916, 1831, 865, 1319, 1656, 1282, 1709, 826, 700, 2440, 1, 2481, 541, 1809, 691, 212, 151, 1514, 2416, 1295, 1747, 555, 351, 179, 2041, 332, 1715, 1285, 595, 2454, 1033, 26, 2057, 1983, 556, 1045, 1006, 7, 2658, 581, 2451, 604, 343, 1209, 2499, 1189, 2546, 1834, 1074, 1338, 1279, 2351, 1674, 908, 873, 1464, 2220, 2065, 114, 650, 1270, 2157, 437, 611, 2121, 1079, 58, 1549, 333, 154, 2656, 762, 150, 684, 2160, 676, 1450, 2377, 1732, 998, 1071, 105, 414, 577, 1783, 2500, 1873, 569, 1671, 1000, 1030, 110, 1734, 718, 65, 696, 2322, 1465, 426, 1428, 1814, 844, 1791, 629, 1278, 1351, 228, 1569, 2584, 1620, 1135, 128, 1722] 116
val_idx:  [1544, 2083, 2058, 287, 1891, 1755, 1396, 741, 636, 1680, 1762, 1550, 2246, 168, 1778, 329, 1402, 2507, 358, 829, 2269, 461, 1874, 448, 915, 478, 252, 112, 121, 2637, 2199, 867, 1398, 111, 435, 851, 477, 1236, 1185, 1196, 801, 136, 368, 434, 2095, 428, 432,

In [6]:
train_data = dl.dataloader(data_obj, train_idx)
val_data = dl.dataloader(data_obj, val_idx)
test_data = dl.dataloader(data_obj, test_idx)
print(train_data)
print(val_data)
print(test_data)

Data(x=[116, 1433], edge_index=[2, 11], y=[116])
Data(x=[175, 1433], edge_index=[2, 35], y=[175])
Data(x=[385, 1433], edge_index=[2, 111], y=[385])


In [7]:
encoder = Sage_En(train_data.x.shape[-1], hdim, dropout)
decoder = EdgePredictor(hdim)
#features = encoder(train_data)
classifier = Sage_Classifier(hdim, hdim, nclass, dropout)
#print(features.shape)

In [8]:
#train_idx_en = np.arange(0, features.shape[0])
#new_features, new_labels, new_train_idx = smote(features = features, labels = train_data.y, train_idx = train_idx_en, portion = 0, im_class_num = im_class_num)
#print(new_features.shape, new_labels.shape, new_train_idx)

Training-Testing Part:

In [9]:
# Without smote
# train_graph(train_data, val_data, encoder, classifier, num_epochs = 1000, lr = lr, weight_decay = weight_decay)

In [10]:
# test_graph(test_data, encoder, classifier)

In [11]:
train_smote(train_data, val_data, encoder, classifier, decoder, num_epochs, lr, weight_decay, train_idx, portion = 0, im_class_num = im_class_num)

torch.Size([127, 64])
Epoch [1/10], Loss: 1.9486, Accuracy: 0.1260, Edge Accuracy: 0.5795
Class 0:AUC-ROC- 0.4640, F1 Score- 0.0000; Class 1:AUC-ROC- 0.3958, F1 Score- 0.0000; Class 2:AUC-ROC- 0.4257, F1 Score- 0.0000; Class 3:AUC-ROC- 0.5584, F1 Score- 0.0000; Class 4:AUC-ROC- 0.4324, F1 Score- 0.0000; Class 5:AUC-ROC- 0.5711, F1 Score- 0.0000; Class 6:AUC-ROC- 0.6101, F1 Score- 0.0000; Macro-Average AUC-ROC: 0.4939,Macro-Average F1 Score: 0.0000
torch.Size([175, 64])
Validation Loss: 19.8215, Validation Accuracy: 0.1429, Validation Edge Accuracy: 0.5775
Class 0:AUC-ROC- 0.5811, F1 Score- 0.0000; Class 1:AUC-ROC- 0.4224, F1 Score- 0.0000; Class 2:AUC-ROC- 0.5209, F1 Score- 0.0000; Class 3:AUC-ROC- 0.5061, F1 Score- 0.0000; Class 4:AUC-ROC- 0.4917, F1 Score- 0.0000; Class 5:AUC-ROC- 0.5459, F1 Score- 0.0000; Class 6:AUC-ROC- 0.5469, F1 Score- 0.0000; Macro-Average AUC-ROC: 0.5164,Macro-Average F1 Score: 0.0000
torch.Size([127, 64])
Epoch [2/10], Loss: 1.9409, Accuracy: 0.1260, Edge Acc

In [12]:
test_smote(test_data, encoder, classifier, decoder)

torch.Size([385, 64])
Test Loss: 58.1518, Test Accuracy: 0.1844, Test Edge Accuracy: 0.5638
Class 0:AUC-ROC- 0.7203, F1 Score- 0.0000; Class 1:AUC-ROC- 0.7241, F1 Score- 0.0000; Class 2:AUC-ROC- 0.7318, F1 Score- 0.0000; Class 3:AUC-ROC- 0.7977, F1 Score- 0.0000; Class 4:AUC-ROC- 0.7191, F1 Score- 0.0000; Class 5:AUC-ROC- 0.7320, F1 Score- 0.0000; Class 6:AUC-ROC- 0.7405, F1 Score- 0.0000; Macro-Average AUC-ROC: 0.7379,Macro-Average F1 Score: 0.0000


Training Part B- On the entire data

In [13]:
# train_smote2(data_obj, encoder, classifier, decoder, num_epochs, lr, weight_decay, train_idx, val_idx, portion = 0, im_class_num = im_class_num)

In [14]:
# test_smote2(data_obj, encoder, classifier, decoder, test_idx)