In [1]:
import graphsage_calculate_embeddings
import test_embeddings
from torch_geometric.datasets import Planetoid
import torch.nn.functional as F
import torch

# Read in data

In [2]:
dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]

# Hyperparameters

In [3]:
learning_rate = 0.0001 # variable to change/play around with for experiments --> 0.0001
aggregator = 'mean' # variable to change/play around with for experiments
directed_graph = False

# FIXED PARAMS 
epochs = 10
dropout_rate = 0.4
normalization = True 
activation_function = F.relu
bias = True
batch_size =  512
neighborhood_1 = 25
neighborhood_2 = 10
embedding_dimension = 128
hidden_layer = 512

# Obtain embedding matrix

In [4]:
number_features, number_nodes = data.num_features, data.x.shape[0]

In [5]:
embedding_matrix = graphsage_calculate_embeddings.compute_embedding_matrix(
    data = data,
    number_features = number_features,
    number_nodes = number_nodes,
    batch_size = batch_size,
    hidden_layer = hidden_layer, 
    epochs = epochs, 
    neighborhood_1 = neighborhood_1,
    neighborhood_2 = neighborhood_2,
    embedding_dimension = embedding_dimension,
    learning_rate = learning_rate,
    dropout_rate = dropout_rate,
    activation_function = activation_function,
    aggregator = aggregator,
    activation_before_normalization = True, 
    bias= True,
    normalize = normalization
)


Training Progress:   9%|▉         | 1/11 [00:01<00:12,  1.29s/it]

Epoch: 000, Total loss: 2.5468, time_taken: 1.290834903717041


Training Progress:  18%|█▊        | 2/11 [00:02<00:10,  1.21s/it]

Epoch: 001, Total loss: 2.5048, time_taken: 1.149693250656128


Training Progress:  27%|██▋       | 3/11 [00:03<00:09,  1.20s/it]

Epoch: 002, Total loss: 2.4936, time_taken: 1.1899290084838867


Training Progress:  36%|███▋      | 4/11 [00:04<00:08,  1.19s/it]

Epoch: 003, Total loss: 2.4811, time_taken: 1.1827359199523926


Training Progress:  45%|████▌     | 5/11 [00:05<00:07,  1.19s/it]

Epoch: 004, Total loss: 2.4417, time_taken: 1.1698400974273682


Training Progress:  55%|█████▍    | 6/11 [00:07<00:05,  1.17s/it]

Epoch: 005, Total loss: 2.3056, time_taken: 1.1503818035125732


Training Progress:  64%|██████▎   | 7/11 [00:08<00:04,  1.15s/it]

Epoch: 006, Total loss: 2.1483, time_taken: 1.108975887298584


Training Progress:  73%|███████▎  | 8/11 [00:09<00:03,  1.15s/it]

Epoch: 007, Total loss: 2.0909, time_taken: 1.1540682315826416


Training Progress:  82%|████████▏ | 9/11 [00:10<00:02,  1.14s/it]

Epoch: 008, Total loss: 2.0542, time_taken: 1.1177711486816406


Training Progress:  91%|█████████ | 10/11 [00:11<00:01,  1.13s/it]

Epoch: 009, Total loss: 2.0564, time_taken: 1.1151878833770752


Training Progress: 100%|██████████| 11/11 [00:12<00:00,  1.16s/it]

Epoch: 010, Total loss: 2.0401, time_taken: 1.0983037948608398
Median time per epoch: 1.1505s





# Save embedding matrix

In [None]:
torch.save(embedding_matrix, 'embeddings/cora_small.pt')

In [16]:
# How to load it again: 
embedding_matrix = torch.load('embeddings/cora_small.pt')
print(embedding_matrix[0])


tensor([-0.0451,  0.1460, -0.0826, -0.0099,  0.0114, -0.1437,  0.0257, -0.1041,
        -0.1011, -0.0123, -0.1976, -0.0826, -0.0536,  0.0027,  0.0145,  0.0394,
         0.0553,  0.0026,  0.0670,  0.0522, -0.0974, -0.0214,  0.0250,  0.1137,
         0.0690,  0.1135, -0.0982,  0.0047, -0.0490,  0.0705, -0.0247,  0.1199,
         0.0261,  0.0567,  0.0634,  0.0701, -0.0677,  0.0688, -0.0173, -0.0420,
         0.1197, -0.0072, -0.1108, -0.1049,  0.0146,  0.1797,  0.1281,  0.0121,
         0.0507,  0.0064, -0.0214, -0.1914,  0.0777, -0.0311,  0.1702,  0.0048,
        -0.1450, -0.1631, -0.0918,  0.0491,  0.0470, -0.0891,  0.0744,  0.0032,
         0.1197, -0.2080,  0.1173,  0.0876,  0.0270,  0.2183, -0.0878,  0.0212,
         0.0602, -0.0598, -0.1275,  0.0259,  0.0396,  0.0113, -0.1043,  0.0663,
        -0.0223, -0.0179,  0.0703,  0.0594, -0.0308,  0.0288, -0.0712,  0.1674,
         0.1075,  0.0526, -0.1342,  0.1112,  0.0426,  0.0493, -0.0336,  0.2211,
        -0.0116, -0.0613,  0.1108,  0.18

# Evaluate node classification 

In [7]:
acc, f1_macro, f1_micro = test_embeddings.test_node_classification_one_class(embedding_matrix, data.y)

In [8]:
print(f"Accuracy: {acc*100:.4f}, F1_macro: {f1_macro*100:.4f}, F1_micro: {f1_micro*100:.4f}")


Accuracy: 81.4631, F1_macro: 79.1748, F1_micro: 81.4631


# Evaluate link prediction

In [9]:
# Transform embedding matrix into numpy 
embedding_detached = embedding_matrix.detach()
embedding_np = embedding_detached.numpy()

# Obtain edges and non existing edges as lists 
edges, non_edges = test_embeddings.get_edges_and_non_edges_as_lists(data, directed_graph)

In [15]:
type(edges)

list

In [11]:
roc_auc = test_embeddings.test_link_prediction_k_fold_validation(embedding_matrix, edges, non_edges)


iteration done


In [12]:
print("ROC AUC Score:", roc_auc*100)


ROC AUC Score: 97.26912787390232
