In [1]:
import torch
from torch import nn
import os.path as osp
import GCL.losses as L
import GCL.augmentors as A
import torch.nn.functional as F
import torch_geometric.transforms as T

from tqdm import tqdm
from torch.optim import Adam
from GCL.eval import from_predefined_split, LREvaluator
from GCL.models import DualBranchContrast

In [2]:
from torch_geometric.nn import GCN

### import data

In [3]:
import scipy.sparse as sp
import numpy as np
import json

import torch
import torch.nn.functional as F

In [4]:
# load data
adj = sp.load_npz('../CSE881_data_2024/adj.npz')
features  = np.load('../CSE881_data_2024/features.npy')
labels = np.load('../CSE881_data_2024/labels.npy')
splits = json.load(open('../CSE881_data_2024/splits.json'))
idx_train, idx_test = splits['idx_train'], splits['idx_test']

In [5]:
# transfer adjacency matrix into edge index
from torch_geometric.utils import from_scipy_sparse_matrix

edge_index = from_scipy_sparse_matrix(adj)
print("There are", edge_index[0].size(1), "edges in total in the graph\n")

print(torch.unique(edge_index[1]))
print("These edges are not weighted.")

There are 10100 edges in total in the graph

tensor([1.])
These edges are not weighted.


In [6]:
print("There are", len(features), "nodes in the graph.")
num_classes = len(np.unique(labels))
print("Each node can be one of", num_classes, "classes.")
print("Training set size:", len(idx_train))
print("Test set size:", len(idx_test))

There are 2480 nodes in the graph.
Each node can be one of 7 classes.
Training set size: 496
Test set size: 1984


In [7]:
device = 'cuda'

In [8]:
features = torch.from_numpy(features).float()
num_features = len(features[0])
print("Number of features:", num_features)

Number of features: 1390


In [9]:
features = features.to(device)
edge_index = edge_index[0].long().to(device)
edge_weight = edge_index[1].float().to(device)

### Split Valiadation Set

In [10]:
# split 20% of training set as validation set
from sklearn.model_selection import train_test_split

idx_train_sub, idx_val, train_labels_sub, val_labels = train_test_split(
    idx_train, labels, test_size=0.2, random_state=123, stratify=labels)

print("Training subset size:", len(idx_train_sub))
print("Validation set size:", len(idx_val))

Training subset size: 396
Validation set size: 100


In [11]:
train_labels_sub = torch.from_numpy(train_labels_sub).long().to(device)
val_labels = torch.from_numpy(val_labels).long().to(device)
labels = torch.from_numpy(labels).long().to(device)

### main

In [12]:
class Encoder(torch.nn.Module):
    def __init__(self, encoder, augmentor, hidden_dim, proj_dim):
        super(Encoder, self).__init__()
        self.encoder = encoder
        self.augmentor = augmentor

        self.fc1 = torch.nn.Linear(hidden_dim, proj_dim)
        self.fc2 = torch.nn.Linear(proj_dim, hidden_dim)

    def forward(self, x, edge_index, edge_weight=None):
        aug1, aug2 = self.augmentor
        x1, edge_index1, edge_weight1 = aug1(x, edge_index, edge_weight)
        x2, edge_index2, edge_weight2 = aug2(x, edge_index, edge_weight)
        z = self.encoder(x, edge_index, edge_weight)
        z1 = self.encoder(x1, edge_index1, edge_weight1)
        z2 = self.encoder(x2, edge_index2, edge_weight2)
        return z, z1, z2

    def project(self, z: torch.Tensor) -> torch.Tensor:
        z = F.elu(self.fc1(z))
        return self.fc2(z)

In [13]:
def train(encoder_model, contrast_model, optimizer, features=features, 
         edge_index=edge_index, edge_weight=edge_weight, labels=labels):
    
    encoder_model.train()
    optimizer.zero_grad()
    z, z1, z2 = encoder_model(features, edge_index, edge_weight)
    h1, h2 = [encoder_model.project(x) for x in [z1, z2]]
    loss = contrast_model(h1, h2)
    loss.backward()
    optimizer.step()
    return loss.item()    

In [14]:
class LogisticRegression(nn.Module):
    def __init__(self, num_features, num_classes):
        super(LogisticRegression, self).__init__()
        self.fc = nn.Linear(num_features, num_classes)
        torch.nn.init.xavier_uniform_(self.fc.weight.data)


    def forward(self, x):
        x = self.fc(x)
        return x

def test(encoder_model, input_dim, num_class, features=features, edge_index=edge_index, 
         labels=labels, edge_weight=edge_weight):
    
    encoder_model.eval()
    with torch.no_grad():
        z, _, _ = encoder_model(features, edge_index, edge_weight)
        z = z.detach()
    
    classifier = LogisticRegression(input_dim, num_class).to(device)
    optimizer = Adam(classifier.parameters(), lr=0.02)
    criterion = torch.nn.CrossEntropyLoss()
    
    for epoch in range(400):
        classifier.train()
        optimizer.zero_grad()
        logits = classifier(z)  
        loss = criterion(logits[idx_train_sub], train_labels_sub)
        loss.backward()
        optimizer.step()
        
        if epoch % 40 == 0:
            classifier.eval()
            with torch.no_grad():
                logits_val = classifier(z)
                loss_val = criterion(logits_val[idx_val], val_labels)
                preds = logits_val.argmax(dim=1)
                correct = preds[idx_val].eq(val_labels).sum().item()
                accuracy = correct / len(val_labels)
                print(f'Epoch: {epoch}, Train Loss: {loss:.4f}, Val Loss: {loss_val:.4f}, Val Accuracy: {accuracy:.4f}')
        
    classifier.eval()
    with torch.no_grad():
        logits = classifier(z)
        preds = logits.argmax(dim=1)
        correct = preds[idx_val].eq(val_labels).sum().item()
        accuracy = correct / len(val_labels)
        print(f'Final Val Accuracy: {accuracy:.4f}')

    return accuracy

In [15]:
aug1 = A.Compose([A.EdgeRemoving(pe=0.1), A.FeatureMasking(pf=0.1), A.FeatureDropout(pf=0.1), A.NodeDropping(pn=0.2)])
aug2 = A.Compose([A.EdgeRemoving(pe=0.1), A.FeatureMasking(pf=0.1), A.FeatureDropout(pf=0.1), A.NodeDropping(pn=0.2)])

In [16]:
gconv = GCN(in_channels=num_features, hidden_channels=512, 
               out_channels=32, num_layers=3, dropout=0.5, act='relu').to(device)

In [17]:
encoder_model = Encoder(encoder=gconv, augmentor=(aug1, aug2), hidden_dim=32, proj_dim=32).to(device)
contrast_model = DualBranchContrast(loss=L.InfoNCE(tau=0.2), mode='L2L', intraview_negs=True).to(device)

In [18]:
optimizer = Adam(encoder_model.parameters(), lr=0.001)

In [19]:
with tqdm(total=700, desc='(T)') as pbar:
    for epoch in range(1, 701):
        loss = train(encoder_model, contrast_model, optimizer=optimizer)
        pbar.set_postfix({'loss': loss})
        pbar.update()
        if epoch % 100 == 0:
            test(encoder_model, input_dim=32, num_class=7)

(T):  14%|█▍        | 100/700 [00:04<00:24, 24.85it/s, loss=8.25]

Epoch: 0, Train Loss: 3.6392, Val Loss: 3.1457, Val Accuracy: 0.1400
Epoch: 40, Train Loss: 1.1094, Val Loss: 1.0370, Val Accuracy: 0.5800
Epoch: 80, Train Loss: 0.9878, Val Loss: 0.8874, Val Accuracy: 0.6800
Epoch: 120, Train Loss: 0.9348, Val Loss: 0.8379, Val Accuracy: 0.7000
Epoch: 160, Train Loss: 0.9000, Val Loss: 0.8068, Val Accuracy: 0.7000
Epoch: 200, Train Loss: 0.8740, Val Loss: 0.7861, Val Accuracy: 0.7100
Epoch: 240, Train Loss: 0.8533, Val Loss: 0.7709, Val Accuracy: 0.7200
Epoch: 280, Train Loss: 0.8360, Val Loss: 0.7591, Val Accuracy: 0.7300


(T):  14%|█▍        | 101/700 [00:04<00:24, 24.85it/s, loss=8.22]

Epoch: 320, Train Loss: 0.8211, Val Loss: 0.7493, Val Accuracy: 0.7400
Epoch: 360, Train Loss: 0.8080, Val Loss: 0.7410, Val Accuracy: 0.7300
Final Val Accuracy: 0.7400


(T):  28%|██▊       | 199/700 [00:09<00:21, 23.05it/s, loss=7.88]

Epoch: 0, Train Loss: 2.4055, Val Loss: 1.8751, Val Accuracy: 0.4000
Epoch: 40, Train Loss: 0.7872, Val Loss: 0.6966, Val Accuracy: 0.7600
Epoch: 80, Train Loss: 0.7455, Val Loss: 0.6674, Val Accuracy: 0.7800
Epoch: 120, Train Loss: 0.7243, Val Loss: 0.6518, Val Accuracy: 0.7900
Epoch: 160, Train Loss: 0.7093, Val Loss: 0.6397, Val Accuracy: 0.7900
Epoch: 200, Train Loss: 0.6975, Val Loss: 0.6323, Val Accuracy: 0.7900
Epoch: 240, Train Loss: 0.6877, Val Loss: 0.6275, Val Accuracy: 0.8000


(T):  29%|██▊       | 200/700 [00:09<00:21, 23.05it/s, loss=7.92]

Epoch: 280, Train Loss: 0.6793, Val Loss: 0.6243, Val Accuracy: 0.8000
Epoch: 320, Train Loss: 0.6718, Val Loss: 0.6221, Val Accuracy: 0.8100
Epoch: 360, Train Loss: 0.6649, Val Loss: 0.6207, Val Accuracy: 0.8100
Final Val Accuracy: 0.8000


(T):  43%|████▎     | 299/700 [00:13<00:15, 25.78it/s, loss=7.74]

Epoch: 0, Train Loss: 3.8204, Val Loss: 3.3015, Val Accuracy: 0.0300
Epoch: 40, Train Loss: 0.7543, Val Loss: 0.6600, Val Accuracy: 0.8100
Epoch: 80, Train Loss: 0.7059, Val Loss: 0.6508, Val Accuracy: 0.8100
Epoch: 120, Train Loss: 0.6852, Val Loss: 0.6342, Val Accuracy: 0.8100
Epoch: 160, Train Loss: 0.6708, Val Loss: 0.6204, Val Accuracy: 0.8200
Epoch: 200, Train Loss: 0.6591, Val Loss: 0.6100, Val Accuracy: 0.8200
Epoch: 240, Train Loss: 0.6491, Val Loss: 0.6018, Val Accuracy: 0.8200
Epoch: 280, Train Loss: 0.6401, Val Loss: 0.5958, Val Accuracy: 0.8100


(T):  43%|████▎     | 301/700 [00:14<00:37, 10.56it/s, loss=7.79]

Epoch: 320, Train Loss: 0.6320, Val Loss: 0.5916, Val Accuracy: 0.8300
Epoch: 360, Train Loss: 0.6245, Val Loss: 0.5887, Val Accuracy: 0.8100
Final Val Accuracy: 0.8000


(T):  57%|█████▋    | 400/700 [00:18<00:11, 25.86it/s, loss=7.59]

Epoch: 0, Train Loss: 4.4630, Val Loss: 3.7361, Val Accuracy: 0.1200
Epoch: 40, Train Loss: 0.7173, Val Loss: 0.6576, Val Accuracy: 0.7900
Epoch: 80, Train Loss: 0.6685, Val Loss: 0.5916, Val Accuracy: 0.8300
Epoch: 120, Train Loss: 0.6476, Val Loss: 0.5801, Val Accuracy: 0.8400
Epoch: 160, Train Loss: 0.6323, Val Loss: 0.5672, Val Accuracy: 0.8200
Epoch: 200, Train Loss: 0.6197, Val Loss: 0.5570, Val Accuracy: 0.8200
Epoch: 240, Train Loss: 0.6088, Val Loss: 0.5494, Val Accuracy: 0.8200
Epoch: 280, Train Loss: 0.5989, Val Loss: 0.5434, Val Accuracy: 0.8200


(T):  57%|█████▋    | 400/700 [00:18<00:11, 25.86it/s, loss=7.64]

Epoch: 320, Train Loss: 0.5898, Val Loss: 0.5386, Val Accuracy: 0.8200
Epoch: 360, Train Loss: 0.5814, Val Loss: 0.5348, Val Accuracy: 0.8200
Final Val Accuracy: 0.8300


(T):  71%|███████▏  | 499/700 [00:22<00:07, 25.85it/s, loss=7.68]

Epoch: 0, Train Loss: 3.1027, Val Loss: 2.4029, Val Accuracy: 0.1800
Epoch: 40, Train Loss: 0.6852, Val Loss: 0.6130, Val Accuracy: 0.8300
Epoch: 80, Train Loss: 0.6328, Val Loss: 0.5792, Val Accuracy: 0.8400
Epoch: 120, Train Loss: 0.6050, Val Loss: 0.5568, Val Accuracy: 0.8500
Epoch: 160, Train Loss: 0.5834, Val Loss: 0.5458, Val Accuracy: 0.8500
Epoch: 200, Train Loss: 0.5663, Val Loss: 0.5392, Val Accuracy: 0.8500
Epoch: 240, Train Loss: 0.5523, Val Loss: 0.5354, Val Accuracy: 0.8400
Epoch: 280, Train Loss: 0.5405, Val Loss: 0.5331, Val Accuracy: 0.8300


(T):  72%|███████▏  | 502/700 [00:23<00:18, 10.84it/s, loss=7.64]

Epoch: 320, Train Loss: 0.5304, Val Loss: 0.5317, Val Accuracy: 0.8200
Epoch: 360, Train Loss: 0.5216, Val Loss: 0.5308, Val Accuracy: 0.8100
Final Val Accuracy: 0.8100


(T):  86%|████████▌ | 599/700 [00:27<00:03, 25.57it/s, loss=7.59]

Epoch: 0, Train Loss: 4.0784, Val Loss: 3.3886, Val Accuracy: 0.1300
Epoch: 40, Train Loss: 0.6916, Val Loss: 0.6294, Val Accuracy: 0.8200
Epoch: 80, Train Loss: 0.6298, Val Loss: 0.5898, Val Accuracy: 0.8300
Epoch: 120, Train Loss: 0.6036, Val Loss: 0.5789, Val Accuracy: 0.8400
Epoch: 160, Train Loss: 0.5844, Val Loss: 0.5684, Val Accuracy: 0.8500
Epoch: 200, Train Loss: 0.5686, Val Loss: 0.5602, Val Accuracy: 0.8400
Epoch: 240, Train Loss: 0.5553, Val Loss: 0.5541, Val Accuracy: 0.8400
Epoch: 280, Train Loss: 0.5437, Val Loss: 0.5497, Val Accuracy: 0.8400
Epoch: 320, Train Loss: 0.5337, Val Loss: 0.5466, Val Accuracy: 0.8400


(T):  86%|████████▌ | 602/700 [00:27<00:09, 10.64it/s, loss=7.65]

Epoch: 360, Train Loss: 0.5247, Val Loss: 0.5446, Val Accuracy: 0.8400
Final Val Accuracy: 0.8400


(T): 100%|██████████| 700/700 [00:31<00:00, 25.59it/s, loss=7.56]

Epoch: 0, Train Loss: 2.7749, Val Loss: 2.6986, Val Accuracy: 0.3500
Epoch: 40, Train Loss: 0.6487, Val Loss: 0.5999, Val Accuracy: 0.8300
Epoch: 80, Train Loss: 0.5909, Val Loss: 0.5618, Val Accuracy: 0.8600
Epoch: 120, Train Loss: 0.5605, Val Loss: 0.5478, Val Accuracy: 0.8500
Epoch: 160, Train Loss: 0.5386, Val Loss: 0.5404, Val Accuracy: 0.8500
Epoch: 200, Train Loss: 0.5218, Val Loss: 0.5373, Val Accuracy: 0.8500
Epoch: 240, Train Loss: 0.5084, Val Loss: 0.5373, Val Accuracy: 0.8600
Epoch: 280, Train Loss: 0.4972, Val Loss: 0.5391, Val Accuracy: 0.8400
Epoch: 320, Train Loss: 0.4876, Val Loss: 0.5418, Val Accuracy: 0.8300


(T): 100%|██████████| 700/700 [00:32<00:00, 21.69it/s, loss=7.56]

Epoch: 360, Train Loss: 0.4793, Val Loss: 0.5449, Val Accuracy: 0.8300
Final Val Accuracy: 0.8300



