In [1]:
import graphsage_calculate_embeddings
import test_embeddings
from torch_geometric.datasets import Planetoid
import torch.nn.functional as F
import torch
import time
import traceback


# Read in data

In [2]:
dataset = Planetoid(root='/tmp/PubMed', name='PubMed')
data = dataset[0]

# Hyperparameters

ValueError: Could not resolve 'map' among choices {'MLPAggregation', 'StdAggregation', 'DeepSetsAggregation', 'EquilibriumAggregation', 'VarAggregation', 'AttentionalAggregation', 'SumAggregation', 'MinAggregation', 'GRUAggregation', 'MaxAggregation', 'MedianAggregation', 'GraphMultisetTransformer', 'Set2Set', 'SoftmaxAggregation', 'DegreeScalerAggregation', 'SetTransformerAggregation', 'MeanAggregation', 'LCMAggregation', 'QuantileAggregation', 'PowerMeanAggregation', 'MultiAggregation', 'Aggregation', 'LSTMAggregation', 'SortAggregation', 'add', 'MulAggregation'}

In [3]:
learning_rates = [0.001] # variable to change/play around with for experiments --> 0.0001
aggregators = ['MeanAggregation', 'MaxAggregation', 'LSTMAggregation'] # variable to change/play around with for experiments
projects = [True, False]
directed_graph = True

# FIXED PARAMS 
epochs = 10
dropout_rate = 0.4
normalization = True 
activation_function = F.relu
bias = True
batch_size =  512
neighborhood_1 = 25
neighborhood_2 = 10
embedding_dimension = 128
hidden_layer = 512
#project = True # layer applies a linear transformation followed by an activation function before aggreagation, as described in EQ. 3 of paper 

# Obtain embedding matrix

In [None]:
number_features, number_nodes = data.num_features, data.x.shape[0]
data = data.sort(sort_by_row=False)

In [None]:
results = []
broken_experiments = []

for lr in learning_rates:
    for aggregator in aggregators:
        for project in projects:
            try: 
                start_time = time.time()
                # Compute the embedding matrix for the current set of hyperparameters
                embedding_matrix = graphsage_calculate_embeddings.compute_embedding_matrix(
                    data=data,
                    number_features=number_features,
                    number_nodes=number_nodes,
                    batch_size=batch_size,
                    hidden_layer=hidden_layer,
                    epochs=epochs,
                    neighborhood_1=neighborhood_1,
                    neighborhood_2=neighborhood_2,
                    embedding_dimension=embedding_dimension,
                    learning_rate=lr,
                    dropout_rate=dropout_rate,
                    activation_function=activation_function,
                    aggregator=aggregator,
                    activation_before_normalization=True, 
                    bias=True,
                    normalize=normalization, 
                    project=project
                )
                
                # Store the embedding matrix and corresponding hyperparameters
                results.append({
                    'learning_rate': lr,
                    'aggregator': aggregator,
                    'embedding_matrix': embedding_matrix, 
                    'time': time.time() - start_time
                })
                torch.save(embedding_matrix, f"embeddings/pubmed/{lr}_{aggregator}_{project}_.pt")
            except Exception as e:
                broken_experiments.append({
                    'learning_rate': lr,
                    'aggregator': aggregator,
                    'embedding_matrix': embedding_matrix, 
                    'time': time.time() - start_time,
                    'error': traceback.format_exc()
                })


# Save embedding matrix

In [None]:
torch.save(embedding_matrix, 'embeddings/cora_small.pt') #TODO 

# Evaluate node classification 

In [None]:
acc, f1_macro, f1_micro = test_embeddings.test_node_classification_one_class(embedding_matrix, data.y)

In [None]:
print(f"Accuracy: {acc*100:.4f}, F1_macro: {f1_macro*100:.4f}, F1_micro: {f1_micro*100:.4f}")


# Evaluate link prediction

In [None]:
# Transform embedding matrix into numpy 
embedding_detached = embedding_matrix.detach()
embedding_np = embedding_detached.numpy()

# Obtain edges and non existing edges as lists 
edges, non_edges = test_embeddings.get_edges_and_non_edges_as_lists(data, directed_graph)

In [None]:
type(edges)

In [None]:
roc_auc = test_embeddings.test_link_prediction_k_fold_validation(embedding_matrix, edges, non_edges)


In [None]:
print("ROC AUC Score:", roc_auc*100)
