In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.append("/usr/local/lib/python3.7/site-packages")
import graph_tool as gt
import graph_tool.all as gtl

In [None]:
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('retina')

In [None]:
import gc

import numpy as np
from tqdm import tqdm


from linkpred.utils import neighbors, graph_neighbors
from linkpred.metrics import (adamic_adar_score, common_neighbors_score,
                             res_allocation)
from linkpred.metrics import compute_metrics, feature_vector

from linkpred.metrics import get_node_features

In [None]:
test_list = np.loadtxt("data/facebook_combined.txt", delimiter=" ")
gt_fc = gt.Graph()
gt_fc.add_edge_list(test_list)

In [None]:
def graph_edges_split(G, p):
    N = G.num_edges()
    K = np.int(N * p)
    
    train_mask = np.array([0] * K + [1] * (N-K), dtype=np.bool)
    np.random.shuffle(train_mask)
    test_mask = ~train_mask
    
    train_graph = gt.GraphView(G, directed=False)
    test_graph = gt.GraphView(G, directed=False)
    
    prop_train = train_graph.new_edge_property("bool")
    prop_train.a = train_mask
    
    prop_test = test_graph.new_edge_property("bool")
    prop_test.a = test_mask
    
    train_graph.set_edge_filter(prop_train)
    test_graph.set_edge_filter(prop_test)
    
    return train_graph, test_graph


def extract_features(G, metrics, file, p=0.1, k_neighbors=2):
    train_graph, test_graph = graph_edges_split(G, p=p)
    nodes_info = graph_neighbors(train_graph, k_neighbors=k_neighbors)

    with open(file, "w") as df:
        for i, node in tqdm(enumerate(train_graph.get_vertices())):
            scores = get_node_features(train_graph, nodes_info, metrics, node)
            
            for row in scores:
                row_str = feature_vector(test_graph, row)
                df.write(row_str + "\n")
    print("DONE")

In [None]:
metrics = [
    common_neighbors_score,
    adamic_adar_score, 
]

In [None]:
extract_features(gt_fc, metrics, "data/fc_features.csv")

In [None]:
metrics_first = [
    common_neighbors_score, 
    adamic_adar_score,
    res_allocation,
]

In [None]:
extract_features(gt_fc, metrics_first, "data/fc_features_first.csv", k_neighbors=1)