In [None]:
import pandas as pd
import csv
import numpy as np
import pickle as pkl

In [None]:
with open('positive_test_edge_indices.pkl', 'rb') as f:
    positive_test_edge_indices = pkl.load(f)
    
with open('positive_train_edge_indices.pkl', 'rb') as f:
    positive_train_edge_indices = pkl.load(f)
    
with open('positive_validation_edge_indices.pkl', 'rb') as f:
    positive_validation_edge_indices = pkl.load(f)
    
with open('negative_test_edge_indices.pkl', 'rb') as f:
    negative_test_edge_indices = pkl.load(f)
    
with open('negative_train_edge_indices.pkl', 'rb') as f:
    negative_train_edge_indices = pkl.load(f)
    
with open('negative_validation_edge_indices.pkl', 'rb') as f:
    negative_validation_edge_indices = pkl.load(f)

In [None]:
with open('node2vec_embeddings_for_matching_lp_4.pkl', 'rb') as f:
    embeddings = pkl.load(f)

In [None]:
# print some examples of embeddings
print(embeddings[0])

In [None]:
# define generate_edge_embeddings function
import torch
def generate_edge_embeddings(h, edges):
    # Extract the source and target node indices from the edges
    src, dst = edges[0], edges[1]
    
    # Use the node indices to get the corresponding node embeddings
    src_embed = h[src]
    dst_embed = h[dst]

    # Concatenate the source and target node embeddings
    edge_embs = torch.cat([src_embed, dst_embed], dim=1)
    
    edge_embs = edge_embs.detach().cpu()

    return edge_embs

In [None]:
# generate edge embeddings from node embeddings, according to positive_train_edge_indices, positive_test_edge_indices, negative_train_edge_indices, negative_test_edge_indices
pos_train_edge_embeddings = generate_edge_embeddings(embeddings, positive_train_edge_indices)
neg_train_edge_embeddings = generate_edge_embeddings(embeddings, negative_train_edge_indices)

In [None]:
# print some examples of train_edge_embeddings
train_edge_embeddings = torch.cat([pos_train_edge_embeddings, neg_train_edge_embeddings], dim=0)
train_edge_labels = torch.cat([torch.ones(pos_train_edge_embeddings.shape[0]), torch.zeros(neg_train_edge_embeddings.shape[0])], dim=0)

In [None]:
# use logistic regression to train a classifier
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression().fit(train_edge_embeddings, train_edge_labels)

In [None]:
pos_test_edge_embeddings = generate_edge_embeddings(embeddings, positive_test_edge_indices)
neg_test_edge_embeddings = generate_edge_embeddings(embeddings, negative_test_edge_indices)
test_edge_embeddings = torch.cat([pos_test_edge_embeddings, neg_test_edge_embeddings], dim=0)
test_edge_labels = torch.cat([torch.ones(pos_test_edge_embeddings.shape[0]), torch.zeros(neg_test_edge_embeddings.shape[0])], dim=0)

In [None]:
# use clf to predict, print auc-roc, f1, precision, recall, and accuracy
from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score, accuracy_score
y_pred = clf.predict(test_edge_embeddings)

print('auc-roc: ', roc_auc_score(test_edge_labels, y_pred))
print('f1: ', f1_score(test_edge_labels, y_pred))
print('precision: ', precision_score(test_edge_labels, y_pred))
print('recall: ', recall_score(test_edge_labels, y_pred))
print('accuracy: ', accuracy_score(test_edge_labels, y_pred))