In [14]:
import graphsage_calculate_embeddings
import test_embeddings
from torch_geometric.datasets import Planetoid
import torch.nn.functional as F
import torch

# Read in data

In [15]:
dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]

In [17]:
learning_rates = [0.00002, 0.0001, 0.001] # variable to change/play around with for experiments --> 0.0001
aggregators = ['MeanAggregation'] # variable to change/play around with for experiments
directed_graph = True

# FIXED PARAMS 
epochs = 10
dropout_rate = 0.4
normalization = True 
activation_function = F.relu
bias = True
batch_size =  512
neighborhood_1 = 25
neighborhood_2 = 10
embedding_dimension = 128
hidden_layer = 512
project = True # layer applies a linear transformation followed by an activation function before aggreagation, as described in EQ. 3 of paper 

# Hyperparameters

In [24]:
learning_rate = 0.0001 # variable to change/play around with for experiments --> 0.0001
aggregator = 'map' # variable to change/play around with for experiments [mean, ]
directed_graph = False

# FIXED PARAMS 
epochs = 10
dropout_rate = 0.4
normalization = True 
activation_function = F.relu
bias = True
batch_size =  512
neighborhood_1 = 25
neighborhood_2 = 10
embedding_dimension = 128
hidden_layer = 512
project = True # layer applies a linear transformation followed by an activation function before aggreagation, as described in EQ. 3 of paper 

# Obtain embedding matrix

In [10]:
number_features, number_nodes = data.num_features, data.x.shape[0]
data = data.sort(sort_by_row=False)

In [None]:
embedding_matrix = graphsage_calculate_embeddings.compute_embedding_matrix(
    data = data,
    number_features = number_features,
    number_nodes = number_nodes,
    batch_size = batch_size,
    hidden_layer = hidden_layer, 
    epochs = epochs, 
    neighborhood_1 = neighborhood_1,
    neighborhood_2 = neighborhood_2,
    embedding_dimension = embedding_dimension,
    learning_rate = learning_rate,
    dropout_rate = dropout_rate,
    activation_function = activation_function,
    aggregator = aggregator,
    activation_before_normalization = True, 
    bias= True,
    normalize = normalization, 
    project = project
)


# Save embedding matrix

In [None]:
torch.save(embedding_matrix, 'embeddings/cora_small.pt')

In [None]:
# How to load it again: 
embedding_matrix = torch.load('embeddings/cora_small.pt')

# Evaluate node classification 

In [7]:
acc, f1_macro, f1_micro = test_embeddings.test_node_classification_one_class(embedding_matrix, data.y)

In [8]:
print(f"Accuracy: {acc*100:.4f}, F1_macro: {f1_macro*100:.4f}, F1_micro: {f1_micro*100:.4f}")


Accuracy: 81.4631, F1_macro: 79.1748, F1_micro: 81.4631


# Evaluate link prediction

In [9]:
# Transform embedding matrix into numpy 
embedding_detached = embedding_matrix.detach()
embedding_np = embedding_detached.numpy()

# Obtain edges and non existing edges as lists 
edges, non_edges = test_embeddings.get_edges_and_non_edges_as_lists(data, directed_graph)

In [11]:
roc_auc = test_embeddings.test_link_prediction_k_fold_validation(embedding_matrix, edges, non_edges)


iteration done


In [12]:
print("ROC AUC Score:", roc_auc*100)


ROC AUC Score: 97.26912787390232
