In [39]:
from grape.edge_prediction import PerceptronEdgePrediction
from grape.embedders import FirstOrderLINEEnsmallen
from grape import Graph
from glob import glob


In [40]:

node_path = glob("../data/merged/merged-kg_nodes.tsv", recursive=True)[0]
edge_path = glob("../data/merged/merged-kg_edges.tsv", recursive=True)[0]

In [41]:
kg_microbe = Graph.from_csv(
    # Edges related parameters

    ## The path to the edges list tsv
    edge_path=edge_path,
    ## Set the tab as the separator between values
    edge_list_separator="\t",
    ## The first rows should NOT be used as the columns names
    edge_list_header=True,
    ## The source nodes are in the first nodes
    sources_column="subject",
    #sources_column_number=1,
    ## The destination nodes are in the second column
    destinations_column="object",
    #destinations_column_number=4,
    ## Both source and destinations columns use numeric node_ids instead of node names
    edge_list_numeric_node_ids=False,
    ## The weights are in the third column
    weights_column_number=None,

    #edge_type_path=edge_path,
    edge_list_edge_types_column="predicate",    

    # Nodes related parameters
    ## The path to the nodes list tsv
    node_path=node_path,
    ## Set the tab as the separator between values
    node_list_separator="\t",
    ## The first rows should be used as the columns names
    node_list_header=True,
    ## The column with the node names is the one with name "node_name".
    nodes_column="id",
    #nodes_column_number=1,

    #node_type_path=node_path,
    node_list_node_types_column="category",
    
    # Graph related parameters
    ## The graph is undirected
    directed=False,
    ## The name of the graph is HomoSapiens
    name="kg_microbe",
    ## Display a progress bar, (this might be in the terminal and not in the notebook)
    verbose=True,
)

In [42]:
kg_microbe = kg_microbe.remove_disconnected_nodes()

In [43]:
train, test = kg_microbe.connected_holdout(train_size=0.75)

In [44]:
%%time
embedding = FirstOrderLINEEnsmallen().fit_transform(train)

CPU times: user 2min 53s, sys: 798 ms, total: 2min 53s
Wall time: 21.1 s


In [45]:
%%time
model = PerceptronEdgePrediction(
    edge_features=None,
    number_of_edges_per_mini_batch=32,
    edge_embeddings="CosineSimilarity"
)
model.fit(
    graph=train, 
    node_features=embedding
)

CPU times: user 1h 32min 11s, sys: 3h 44min 54s, total: 5h 17min 6s
Wall time: 43min 28s


In [46]:
%%time
# A perfect model should correctly predict the existence
# of all of these edges.
model.predict_proba(
    graph=test,
    node_features=embedding,
    return_predictions_dataframe=True
)

ValueError: The density of the support graph has changed too much from the training graph. The density of the training graph was 2.0385230173922012e-06 while the density of the support graph is 6.381506176570104e-07. This may be due to a change in the graph structure or the provided support graph is not the one used during training. If you are using the same graph, please do provide the same support graph during prediction as you did during training.

In [47]:
%%time
# A perfect model should correctly predict the non-existance
# of all of these edges.
model.predict_proba(
    graph=kg_microbe.sample_negative_graph(number_of_negative_samples=test.get_number_of_edges()),
    node_features=embedding,
    return_predictions_dataframe=True
)

ValueError: The density of the support graph has changed too much from the training graph. The density of the training graph was 2.0385230173922012e-06 while the density of the support graph is 3.417185360828474e-07. This may be due to a change in the graph structure or the provided support graph is not the one used during training. If you are using the same graph, please do provide the same support graph during prediction as you did during training.