In [1]:
from grape.edge_prediction import PerceptronEdgePrediction
from grape.embedders import FirstOrderLINEEnsmallen
from grape.datasets.string import HomoSapiens

In [2]:
%%time
graph = HomoSapiens()\
    .filter_from_ids(min_edge_weight=700)\
    .remove_disconnected_nodes()

CPU times: user 18.8 s, sys: 638 ms, total: 19.4 s
Wall time: 8.36 s


In [3]:
train, test = graph.connected_holdout(train_size=0.7)

In [4]:
%%time
embedding = FirstOrderLINEEnsmallen().fit_transform(train)

CPU times: user 38.1 s, sys: 58.5 ms, total: 38.1 s
Wall time: 1.71 s


In [9]:
%%time
model = PerceptronEdgePrediction(
    edge_features=None,
    number_of_edges_per_mini_batch=32,
    edge_embeddings="CosineSimilarity"
)
model.fit(
    graph=train, 
    node_features=embedding
)

CPU times: user 11min 8s, sys: 2min 2s, total: 13min 11s
Wall time: 37.3 s


There are a number of possible prediction methods:

In [6]:
[
    method_name
    for method_name in dir(model)
    if method_name.startswith("predict")
]

['predict',
 'predict_bipartite_graph_from_edge_node_ids',
 'predict_bipartite_graph_from_edge_node_names',
 'predict_bipartite_graph_from_edge_node_prefixes',
 'predict_bipartite_graph_from_edge_node_types',
 'predict_clique_graph_from_node_ids',
 'predict_clique_graph_from_node_names',
 'predict_clique_graph_from_node_prefixes',
 'predict_clique_graph_from_node_types',
 'predict_proba',
 'predict_proba_bipartite_graph_from_edge_node_ids',
 'predict_proba_bipartite_graph_from_edge_node_names',
 'predict_proba_bipartite_graph_from_edge_node_prefixes',
 'predict_proba_bipartite_graph_from_edge_node_types',
 'predict_proba_clique_graph_from_node_ids',
 'predict_proba_clique_graph_from_node_names',
 'predict_proba_clique_graph_from_node_prefixes',
 'predict_proba_clique_graph_from_node_types']

In [7]:
%%time
# A perfect model should correctly predict the existance
# of all of these edges.
model.predict_proba(
    graph=test,
    node_features=embedding,
    return_predictions_dataframe=True
)

CPU times: user 86.7 ms, sys: 528 µs, total: 87.2 ms
Wall time: 13.3 ms


Unnamed: 0,predictions,sources,destinations
0,0.460152,0,13188
1,0.557793,0,13510
2,0.464734,0,15031
3,0.509990,0,15209
4,0.469962,0,15389
...,...,...,...
151785,0.456929,16813,13104
151786,0.466778,16813,14274
151787,0.599738,16813,14471
151788,0.486138,16813,14862


In [8]:
%%time
# A perfect model should correctly predict the non-existance
# of all of these edges.
model.predict_proba(
    graph=graph.sample_negative_graph(number_of_negative_samples=test.get_number_of_edges()),
    node_features=embedding,
    return_predictions_dataframe=True
)

CPU times: user 227 ms, sys: 0 ns, total: 227 ms
Wall time: 28.3 ms


Unnamed: 0,predictions,sources,destinations
0,0.456567,0,39
1,0.568560,0,209
2,0.474594,0,1461
3,0.627093,0,1925
4,0.522914,0,2687
...,...,...,...
151785,0.418893,16813,13721
151786,0.530282,16813,15144
151787,0.436179,16813,15251
151788,0.482228,16813,15928


Let's now build a better evaluation using the edge prediction pipeline: