In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import sys
sys.path.insert(0, '/home/chengtao/Workspace/pygcn')
from torchdiffeq import odeint
import random
import numpy as np
import time
import torch.optim as optim
from pygcn.layers import GraphConvolution
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import scipy.sparse as sp
import networkx as nx
from collections import defaultdict

In [None]:
from pygcn.utils import load_data, encode_onehot, normalize, sparse_mx_to_torch_sparse_tensor, accuracy

## Model + Utils Functions

In [None]:
def normalize_adj(adj):
    """Ĩ = D^{-1/2}(A+I)D^{-1/2}"""
    adj = adj + sp.eye(adj.shape[0])
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt)

def row_normalize(features):
    rowsum = np.array(features.sum(1))
    r_inv = np.power(rowsum, -1.).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    return r_mat_inv.dot(features)

def parse_index_file(filename):
    """Parse index file."""
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index

def labels_to_tensor(labels):
    """Convert labels to tensor."""
    labels_int = np.argmax(labels, axis=1)      # shape (N,)
    labels_tensor = torch.LongTensor(labels_int)  # also shape (N,)
    rows, cols = np.where(labels)                 # rows: node indices, cols: class indices
    class_indices  = np.full(labels.shape[0], -1, int)
    class_indices[rows] = cols                       # only labeled rows get 0…C–1, rest stay -1
    labels_tensor   = torch.LongTensor(class_indices)  # shape (N,)
    return labels_tensor

def scipy_to_tensor(x):
    dense = x.toarray()
    features_tensor = torch.from_numpy(dense)
    return features_tensor.float()

def check_zero_row_indices(labels, idx_train, idx_val, idx_test):
    """Check if there are any zero row indices in the labels."""
    zero_row_indices = np.where(~labels.any(axis=1))[0]
    if len(zero_row_indices) > 0:
        print("Zero row indices found in labels:", zero_row_indices)
    else:
        print("No zero row indices found in labels.")
    # Check if any of the zero row indices are in the train, val, or test sets
    train_idx_set = set(idx_train.tolist())
    val_idx_set = set(idx_val.tolist())
    test_idx_set = set(idx_test.tolist())

    if len(train_idx_set.intersection(zero_row_indices)) > 0:
        return f"Train set contains zero row indices, which they are {train_idx_set.intersection(zero_row_indices)}"
    if len(val_idx_set.intersection(zero_row_indices)) > 0:
        return f"Validation set contains zero row indices, which they are {val_idx_set.intersection(zero_row_indices)}"
    if len(test_idx_set.intersection(zero_row_indices)) > 0:
        return f"Test set contains zero row indices, which they are {test_idx_set.intersection(zero_row_indices)}"
    
    return "No zero row indices found in train, val, or test sets."

In [14]:
class EarlyStopping:
    def __init__(self, patience=10, delta=0, verbose=False, trace_func=print, save_path='checkpoint.pth'):
        self.patience = patience
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.delta = delta
        self.verbose = verbose
        self.trace_func = trace_func
        self.save_path = save_path

    def __call__(self, val_loss, model):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        torch.save(model.state_dict(), self.save_path)
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.best_score:.6f} --> {-val_loss:.6f}).  Saving model ...')

In [None]:
class GCNLayer(torch.nn.Module):
    def __init__(self, in_feats, out_feats, bias=True):
        super().__init__()
        self.weight = torch.nn.Parameter(torch.empty(in_feats, out_feats))
        torch.nn.init.xavier_uniform_(self.weight)
        self.bias = torch.nn.Parameter(torch.zeros(out_feats)) if bias else None

    def forward(self, x, adj):                            
        support = x @ self.weight
        out = torch.sparse.mm(adj, support)
        return out + (self.bias if self.bias is not None else 0.)

class GCN(torch.nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout):
        super().__init__()
        self.gc1 = GCNLayer(nfeat, nhid)
        self.gc2 = GCNLayer(nhid, nclass)
        self.dropout = dropout

    def forward(self, x, adj):
        x = torch.nn.functional.dropout(x, self.dropout, training=self.training)  # input-dropout
        x = torch.nn.functional.relu(self.gc1(x, adj))
        x = torch.nn.functional.dropout(x, self.dropout, training=self.training)  # hidden-dropout
        x = self.gc2(x, adj)
        return torch.nn.functional.log_softmax(x, dim=1)


## Load data

In [None]:
names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
objects = []
dataset_name = 'pubmed'
for i in range(len(names)):
        with open("/home/chengtao/Workspace/Tensorflow_GCN/gcn-master/gcn/data/ind.{}.{}".format(dataset_name, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pickle.load(f, encoding='latin1'))
            else:
                objects.append(pickle.load(f))
                
x, y, tx, ty, allx, ally, graph = tuple(objects)

test_idx_reorder = parse_index_file("/home/chengtao/Workspace/Tensorflow_GCN/gcn-master/gcn/data/ind.{}.test.index".format(dataset_name))
test_idx_range = np.sort(test_idx_reorder)

In [None]:
features = sp.vstack((allx, tx)).tolil()
features[test_idx_reorder, :] = features[test_idx_range, :]
features = row_normalize(features)
adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
adj = normalize_adj(adj)

labels = np.vstack((ally, ty))
labels[test_idx_reorder, :] = labels[test_idx_range, :]

idx_test = test_idx_range.tolist()
idx_train = range(len(y))
idx_val = range(len(y), len(y)+500)

In [None]:
features = scipy_to_tensor(features)
adj = scipy_to_tensor(adj)
idx_train = torch.LongTensor(idx_train)
idx_val = torch.LongTensor(idx_val)
idx_test = torch.LongTensor(idx_test)
check_zero_row_indices(labels, idx_train, idx_val, idx_test)
labels = labels_to_tensor(labels)
print('features.shape:', features.shape)
print('adj.shape:', adj.shape)
print('labels.shape:', labels.shape)
print('idx_train.shape:', idx_train.shape)
print('idx_val.shape:', idx_val.shape)
print('idx_test.shape:', idx_test.shape)

## Check Nodes

In [None]:
# np_adj = adj.to_dense().numpy()
# train_adj = np_adj[idx_train] # (60 x 19717) means 60 nodes, each with 19717 features
# val_adj = np_adj[idx_val] # (500 x 19717)
# test_adj = np_adj[idx_test] # (1000 x 19717)
# train_indices = idx_train.tolist()
# val_indices = idx_val.tolist()
# test_indices = idx_test.tolist()

# count_train_children = defaultdict(list)
# train_covered_node_indices = set()

# def check_covered_node_indices_and_neighbors(data_adj, data_indices):
#     count_neighbors = defaultdict(list)
#     covered_node_indices = set()
#     for i in range(len(data_adj)):
#         curr_node = data_adj[i]
#         curr_idx = data_indices[i]
#         for j in range(len(curr_node)):
#             if curr_node[j] != 0:
#                 if i != j:
#                     count_neighbors[curr_idx].append(j)
#                 covered_node_indices.add(j)
#     return covered_node_indices, count_neighbors

In [None]:
# train_covered_nodes, train_neighbors = check_covered_node_indices_and_neighbors(train_adj, train_indices)
# val_covered_nodes, val_neighbors = check_covered_node_indices_and_neighbors(val_adj, val_indices)
# test_covered_nodes, test_neighbors = check_covered_node_indices_and_neighbors(test_adj, test_indices)

In [None]:
# print(f"total number of train_covered_nodes: {len(train_covered_nodes)}")
# print(f'percentage of train_covered_nodes: {(len(train_covered_nodes) / len(adj)) * 100:.2f}%')
# print(f"total number of val_covered_nodes: {len(val_covered_nodes)}")

In [None]:
# untrained_nodes = set(range(len(adj))) - train_covered_nodes

# # count how many untrained nodes appear in validation and testing.
# untrained_nodes_in_val = set()
# untrained_nodes_in_test = set()
# trained_nodes_in_val = set()
# trained_nodes_in_test = set()

# val_covered_node_list = list(val_covered_nodes)
# val_covered_node_list.sort()
# test_covered_node_list = list(test_covered_nodes)
# test_covered_node_list.sort()

# for i in range(len(val_covered_node_list)):
#     if val_covered_node_list[i] in untrained_nodes:
#         untrained_nodes_in_val.add(val_covered_node_list[i])
#     else:
#         trained_nodes_in_val.add(val_covered_node_list[i])

# for i in range(len(test_covered_node_list)):
#     if test_covered_node_list[i] in untrained_nodes:
#         untrained_nodes_in_test.add(test_covered_node_list[i])
#     else:
#         trained_nodes_in_test.add(test_covered_node_list[i])

In [None]:
# # save untrained_nodes to csv
# untrained_nodes_df = pd.DataFrame(list(untrained_nodes), columns=['untrained_nodes'])
# untrained_nodes_df.to_csv('./data/untrained_nodes.csv', index=False)

In [None]:
# print("validation nodes that overlap with training nodes:", len(trained_nodes_in_val))
# print("testing nodes that overlap with training nodes:", len(trained_nodes_in_test))
# print("validation nodes that are untrained:", len(untrained_nodes_in_val))
# print("testing nodes that are untrained:", len(untrained_nodes_in_test))

In [None]:
# train_adj = np.sum(train_adj, axis=0)
# print('train_adj.shape:', train_adj.shape)

# count_zero = 0
# count_nonzero = 0
# uncovered_idnices = []

# for i in range(len(train_adj)):
#     if train_adj[i] == 0:
#         count_zero += 1
#         uncovered_idnices.append(i)
#     else:
#         count_nonzero += 1
# print('count_zero:', count_zero)
# print('count_nonzero:', count_nonzero)

## Run GCN

In [None]:
# ─── Reproducibility ──────────────────────────────────────────────────────────
SEED = 123
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
val_outputs_history = []
# weight_history = []


def train(epoch, model, optimizer, log_freq=10):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    
    output = model(features, adj) # forward pass

    loss_train = F.nll_loss(output[idx_train], labels[idx_train]) # calculate loss 140
    acc_train = accuracy(output[idx_train], labels[idx_train])
    
    loss_train.backward()
    optimizer.step()

    # Validation phase
    model.eval()
    output = model(features, adj)

    val_output = output[idx_val].detach().cpu().numpy()
    val_outputs_history.append(val_output)

    loss_val = F.nll_loss(output[idx_val], labels[idx_val])
    acc_val = accuracy(output[idx_val], labels[idx_val])
    if (epoch + 1) % log_freq == 0:
        print('Epoch: {:04d}'.format(epoch+1),
              'loss_train: {:.4f}'.format(loss_train.item()),
              'acc_train: {:.4f}'.format(acc_train.item()),
              'loss_val: {:.4f}'.format(loss_val.item()),
              'acc_val: {:.4f}'.format(acc_val.item()),
              'time: {:.4f}s'.format(time.time() - t))

    # return loss_train, acc_train, loss_val, acc_val

def test(model):
    model.eval()
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("Test set results:",
          "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))
    # return acc_test, loss_test

model = GCN(nfeat=features.shape[1],
            nhid=16,
            nclass=labels.max().item() + 1,
            dropout=0.5)
optimizer = optim.Adam(model.parameters(),
                       lr=0.01, weight_decay=5e-4)

device = torch.device('cuda' if True and torch.cuda.is_available() else 'cpu')

model = model.to(device)
features = features.to(device)
adj = adj.to(device)
labels = labels.to(device)
idx_train = idx_train.to(device)
idx_val = idx_val.to(device)
idx_test = idx_test.to(device)

In [None]:
model = GCN(nfeat=features.shape[1],
            nhid=16,
            nclass=labels.max().item() + 1,
            dropout=0.5)
optimizer = optim.Adam(model.parameters(),
                       lr=0.01, weight_decay=5e-4)

In [None]:
device = torch.device('cuda' if True and torch.cuda.is_available() else 'cpu')

model = model.to(device)
features = features.to(device)
adj = adj.to(device)
labels = labels.to(device)
idx_train = idx_train.to(device)
idx_val = idx_val.to(device)
idx_test = idx_test.to(device)

In [None]:
for epoch in range(200):
    train(epoch, model, optimizer, log_freq=1)

In [None]:
test(model)

## NEW GCN

In [9]:
# class GCN(torch.nn.Module):
#     def __init__(self, nfeat, nhid, nclass, dropout):
#         super().__init__()
#         self.gc1 = GCNLayer(nfeat, nhid)
#         self.gc2 = GCNLayer(nhid, nclass)
#         self.dropout = dropout

#     def forward(self, x, adj):
#         x = torch.nn.functional.dropout(x, self.dropout, training=self.training)  # input-dropout
#         x = torch.nn.functional.relu(self.gc1(x, adj))
#         x = torch.nn.functional.dropout(x, self.dropout, training=self.training)  # hidden-dropout
#         x = self.gc2(x, adj)
#         return torch.nn.functional.log_softmax(x, dim=1)

class NewGCN(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout, input_shape=None, node_masked=None):
        super(NewGCN, self).__init__()
        self.gc1 = GCNLayer(nfeat, nhid)
        self.gc2 = GCNLayer(nhid, nclass)
        self.dropout = dropout
        self.node_masked = node_masked  # list of indices for masked nodes
        
        total_nodes = input_shape[0]
        feature_dim = input_shape[1]

        # Compute unmasked indices: nodes not in node_masked.
        self.unmasked_indices = [i for i in range(total_nodes) if (node_masked is None or i not in node_masked)]
        
        if node_masked is not None:
            # Create trainable weights only for the unmasked nodes.
            init_train_weights = torch.empty(len(self.unmasked_indices), feature_dim)
            nn.init.xavier_uniform_(init_train_weights)
            self.node_train_weights = nn.Parameter(self.min_max_normalize(init_train_weights), requires_grad=True)
            # init_train_weights = torch.ones(len(self.unmasked_indices), feature_dim)
            # self.node_train_weights = nn.Parameter(init_train_weights, requires_grad=True)
            
            # Initialize weights for masked nodes to zero.
            init_node_masked_weights = torch.ones(len(node_masked), feature_dim)
            self.node_masked_weights = nn.Parameter(init_node_masked_weights, requires_grad=False)
        else:
            self.node_weights = nn.Parameter(torch.empty(input_shape), requires_grad=True)
            nn.init.xavier_uniform_(self.node_weights)

    def min_max_normalize(self, x):
        x_min = torch.min(x)
        x_max = torch.max(x)
        return (x - x_min) / (x_max - x_min + 1e-8)

    def forward(self, x, adj):
        if self.node_masked is not None:
            # reconstruct full [num_nodes, feature_dim] weight matrix
            full_w = torch.zeros_like(x, device=x.device)
            um_idx = torch.tensor(self.unmasked_indices, device=x.device)
            index   = um_idx.unsqueeze(1).expand(-1, x.size(1)) 
            full_w  = full_w.scatter(0, index, self.node_train_weights)
            full_w[self.node_masked] = self.node_masked_weights   
        else:
            full_w = self.node_train_weights

        # scale each node’s feature‐vector
        x = x * full_w
        x = torch.nn.functional.dropout(x, self.dropout, training=self.training)
        x = torch.nn.functional.relu(self.gc1(x, adj))
        x = torch.nn.functional.dropout(x, self.dropout, training=self.training)
        x = self.gc2(x, adj)
        return torch.nn.functional.log_softmax(x, dim=1)
    
    #     def forward(self, x, adj):
#         x = torch.nn.functional.dropout(x, self.dropout, training=self.training)  # input-dropout
#         x = torch.nn.functional.relu(self.gc1(x, adj))
#         x = torch.nn.functional.dropout(x, self.dropout, training=self.training)  # hidden-dropout
#         x = self.gc2(x, adj)
#         return torch.nn.functional.log_softmax(x, dim=1)

## Environment Set Up

In [12]:
import os
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
features = features.to(device)
adj      = adj.to(device)
labels   = labels.to(device)
idx_train = idx_train.to(device)
idx_val  = idx_val.to(device)
idx_test = idx_test.to(device)

train_uncovered_node_list = pd.read_csv('./data/train_uncovered_node_list.csv')
node_masked = train_uncovered_node_list['untrained_nodes'].tolist()


if not os.path.exists('./saved_weights'):
    os.mkdir('./saved_weights')

phase_one_save_dir = './saved_weights/phase_one'
if not os.path.exists(phase_one_save_dir):
    os.mkdir(phase_one_save_dir)

phase_two_save_dir = './saved_weights/phase_two'
if not os.path.exists(phase_two_save_dir):
    os.mkdir(phase_two_save_dir)


In [16]:
def test(model):
    model.eval()
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    # print("Test set results:",
    #       "loss= {:.4f}".format(loss_test.item()),
    #       "accuracy= {:.4f}".format(acc_test.item()))
    return acc_test, loss_test

## Phases

### Phase 1

In [26]:
PHASE_ONE_COUNT    = 3
PHASE_ONE_ACTIVE_EARLY_STOPPING = True

epochs             = 200
node_weights_lr    = 0.0
node_weights_decay = 0.0
gc_layers_lr       = 1e-2
gc_layers_decay    = 1e-4

In [27]:
phase1_test_acc_list = []
phase1_test_loss_list = []

for run in range(PHASE_ONE_COUNT):
    model = NewGCN(
        nfeat   = features.shape[1],
        nhid    = 64,
        nclass  = labels.max().item() + 1,
        dropout = 0.4,
        input_shape = features.shape,
        node_masked = node_masked
    )
    
    model = model.to(device)
    param_groups = [
        {
            'params': [model.node_train_weights],
            'lr': node_weights_lr,
            'weight_decay': node_weights_decay
        },
        {
            'params': [
                p for n,p in model.named_parameters()
                if n != 'node_train_weights'
            ],
            'lr': gc_layers_lr,
            'weight_decay': gc_layers_decay
        }
    ]
    optimizer = optim.Adam(param_groups)
    
    early_stopping = EarlyStopping(
        patience=10, 
        verbose=False, 
        save_path=f"{phase_one_save_dir}/best_model_phase1_({run})run.pth")

    for epoch in range(epochs):
        # ---- train ----
        model.train()
        optimizer.zero_grad()
        output = model(features, adj)
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        loss_train.backward()
        optimizer.step()

        # ---- val ----
        model.eval()
        with torch.no_grad():
            out_val = model(features, adj)
            acc_val = accuracy(out_val[idx_val], labels[idx_val])
            loss_val = F.nll_loss(out_val[idx_val], labels[idx_val])
        
        if PHASE_ONE_ACTIVE_EARLY_STOPPING:
            early_stopping(loss_val, model)
            if early_stopping.early_stop:
                print(f"Early stopping triggered at epoch {epoch+1}")
                break

    # Save the model of last epoch
    if not PHASE_ONE_ACTIVE_EARLY_STOPPING:
        torch.save(model.state_dict(), f"{phase_one_save_dir}/last_model_phase1_({run})run.pth")

    # ---- test ----
    acc_test, loss_test = test(model)
    phase1_test_acc_list.append(acc_test.item())
    phase1_test_loss_list.append(loss_test.item())
    print(f"Run {run+1}/{PHASE_ONE_COUNT} - Test Accuracy: {acc_test.item():.4f}, Test Loss: {loss_test.item():.4f}")


std_acc = np.std(phase1_test_acc_list)
mean_acc = np.mean(phase1_test_acc_list)
std_loss = np.std(phase1_test_loss_list)
mean_loss = np.mean(phase1_test_loss_list)
print(f"Mean Test Accuracy: {mean_acc:.4f} ± {std_acc:.4f}")
print(f"Mean Test Loss: {mean_loss:.4f} ± {std_loss:.4f}")

Early stopping triggered at epoch 62
Run 1/3 - Test Accuracy: 0.7740, Test Loss: 0.6020
Early stopping triggered at epoch 56
Run 2/3 - Test Accuracy: 0.7610, Test Loss: 0.5960
Early stopping triggered at epoch 59
Run 3/3 - Test Accuracy: 0.7660, Test Loss: 0.5809
Mean Test Accuracy: 0.7670 ± 0.0054
Mean Test Loss: 0.5930 ± 0.0089


### Phase 2

In [28]:
NODE_WEIGHT_DROP_RATE = 0.0
INIT_VALUE_FOR_ZERO_WEIGHT = 1e-12
PHASE_TWO_COUNT = 3
PHASE_TWO_ACTIVE_EARLY_STOPPING = True

epochs = 200
node_weights_lr     = 0.001
node_weights_decay  = 0.0
gc_layers_lr        = 0.0
gc_layers_lr_decay  = 0.0
phase1_saved_model_name = 'best_model_phase1' if PHASE_ONE_ACTIVE_EARLY_STOPPING else 'last_model_phase1'

In [29]:
phase2_test_acc_list = []
phase2_test_loss_list = []

# replace zero value in the features
features = torch.where(features == 0, torch.tensor(INIT_VALUE_FOR_ZERO_WEIGHT, device=features.device), features)

for run in range(PHASE_TWO_COUNT):

    # ------ Set up model ------
    # 1. instantiate the model
    model = NewGCN(
        nfeat   = features.shape[1],
        nhid    = 64,
        nclass  = labels.max().item() + 1,
        dropout = NODE_WEIGHT_DROP_RATE, # set dropout rate to 0.0
        input_shape = features.shape,
        node_masked = node_masked
    )

    # 2. load the best model from phase 1
    state_dict = torch.load(f"{phase_one_save_dir}/{phase1_saved_model_name}_({run})run.pth")
    model.load_state_dict(state_dict)

    # 4. set model to device
    model = model.to(device)

    # 5. set optimizer
    param_groups = [
        {
            'params': [model.node_train_weights],
            'lr': node_weights_lr,
            'weight_decay': node_weights_decay
        },
        {
            'params': [
                p for n,p in model.named_parameters()
                if n != 'node_train_weights'
            ],
            'lr': gc_layers_lr,
            'weight_decay': gc_layers_decay
        }
    ]
    optimizer = optim.Adam(param_groups)

    early_stopping = EarlyStopping(
        patience=10, 
        verbose=False, 
        save_path=f"{phase_two_save_dir}/best_model_phase1_({run})run.pth")

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()

        output = model(features, adj)
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        acc_train = accuracy(output[idx_train], labels[idx_train])
        
        loss_train.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            out_val = model(features, adj)
            acc_val = accuracy(out_val[idx_val], labels[idx_val])
            loss_val = F.nll_loss(out_val[idx_val], labels[idx_val])

        if PHASE_TWO_ACTIVE_EARLY_STOPPING:
            early_stopping(loss_val, model)
            if early_stopping.early_stop:
                print(f"Early stopping triggered at epoch {epoch+1}")
                break

    # Save the model of last epoch
    if not PHASE_TWO_ACTIVE_EARLY_STOPPING:
        torch.save(model.state_dict(), f"{phase_two_save_dir}/last_model_phase2_({run})run.pth")

    # ---- test ----
    acc_test, loss_test = test(model)
    phase2_test_acc_list.append(acc_test.item())
    phase2_test_loss_list.append(loss_test.item())
    print(f"Run {run+1}/{PHASE_ONE_COUNT} - Test Accuracy: {acc_test.item():.4f}, Test Loss: {loss_test.item():.4f}")


std_acc = np.std(phase2_test_acc_list)
mean_acc = np.mean(phase2_test_acc_list)
std_loss = np.std(phase2_test_loss_list)
mean_loss = np.mean(phase2_test_loss_list)
print(f"Mean Test Accuracy: {mean_acc:.4f} ± {std_acc:.4f}")
print(f"Mean Test Loss: {mean_loss:.4f} ± {std_loss:.4f}")

  state_dict = torch.load(f"{phase_one_save_dir}/{phase1_saved_model_name}_({run})run.pth")


Early stopping triggered at epoch 11
Run 1/3 - Test Accuracy: 0.7620, Test Loss: 0.5901
Early stopping triggered at epoch 11
Run 2/3 - Test Accuracy: 0.7450, Test Loss: 0.5926
Early stopping triggered at epoch 11
Run 3/3 - Test Accuracy: 0.7580, Test Loss: 0.5777
Mean Test Accuracy: 0.7550 ± 0.0073
Mean Test Loss: 0.5868 ± 0.0065
