In [1]:
!pip install grape

Defaulting to user installation because normal site-packages is not writeable


In [2]:
# untar if data/merged-kg_edges.tsv and data/merged-kg_nodes.tsv are not present
import os 
if not os.path.exists("data/merged-kg_edges.tsv") or not os.path.exists("data/merged-kg_nodes.tsv"):
    # !tar -xzf data/merged-kg.tar.gz via system call
    os.system("tar -xzf data/merged-kg.tar.gz")    

In [None]:
from grape import Graph

kg_microbe = Graph.from_csv(
    edge_path="data/merged-kg_edges.tsv",
    node_path="data/merged-kg_nodes.tsv",
    node_list_separator="\t",
    edge_list_separator="\t",
    node_list_header=True,  # Always true for KG-Hub KGs
    edge_list_header=True,  # Always true for KG-Hub KGs
    nodes_column='id',  # Always true for KG-Hub KGs
    node_list_node_types_column='category',  # Always true for KG-Hub KGs
    node_types_separator='|',
    sources_column='subject',  # Always true for KG-Hub KGs
    destinations_column='object',  # Always true for KG-Hub KGs
    edge_list_edge_types_column='predicate',
    directed=False,
    name='KG Microbe',
    # Since we are not providing the node types and edge types lists, the order of
    # the node types and edge types would not be deterministic if we were to populate
    # the vocabulary in parallel. For this reason, we process them sequentially.
    load_node_list_in_parallel=False,
    load_edge_list_in_parallel=False,
)
kg_microbe

In [None]:
kg_microbe = kg_microbe.remove_disconnected_nodes()
kg_microbe

In [12]:
import grape
grape.get_available_models_for_node_embedding()

Unnamed: 0,model_name,task_name,library_name,available,requires_node_types,can_use_node_types,requires_edge_types,can_use_edge_types,requires_edge_type_features,can_use_edge_type_features,requires_edge_features,can_use_edge_features,requires_edge_weights,can_use_edge_weights,requires_positive_edge_weights
0,Second-order LINE,Node Embedding,Ensmallen,True,False,False,False,False,False,False,False,False,False,False,False
2,DeepWalk SkipGram,Node Embedding,Ensmallen,True,False,True,False,True,False,False,False,False,False,True,True
4,RUINE,Node Embedding,Ensmallen,True,False,False,False,False,False,False,False,False,False,False,False
5,SocioDim,Node Embedding,Ensmallen,True,False,False,False,False,False,False,False,False,False,False,False
7,WeightedSPINE,Node Embedding,Ensmallen,True,False,False,False,False,False,False,False,False,True,True,True
8,Degree-based SPINE,Node Embedding,Ensmallen,True,False,False,False,False,False,False,False,False,False,False,False
9,Structured Embedding,Node Embedding,Ensmallen,True,False,False,True,True,False,False,False,False,False,False,False
11,Walklets CBOW,Node Embedding,Ensmallen,True,False,True,False,True,False,False,False,False,False,True,True
12,First-order LINE,Node Embedding,Ensmallen,True,False,False,False,False,False,False,False,False,False,False,False
14,TransE,Node Embedding,Ensmallen,True,False,False,True,True,False,False,False,False,False,False,False


In [26]:
import grape 

sg = grape.embedders.DeepWalkSkipGramEnsmallen(normalize_by_degree=True, embedding_size=200)
degree_normalized_deepwalk_embedding = sg.fit_transform(kg_microbe)

In [27]:
import matplotlib.pyplot as plt
import datetime
import pandas as pd
from grape import GraphVisualizer

current_datetime = datetime.datetime.now()
formatted_date = current_datetime.strftime("%Y-%m-%d_%H_%M_%S")
print(formatted_date)

node_embed = degree_normalized_deepwalk_embedding.get_all_node_embeddings()#to_csv(formatted_date+".tsv",sep="\t")
# For the first embedding
node_embed_df = pd.DataFrame(node_embed[0])
node_embed_df.to_csv("../output/DeepWalkSkipGramEnsmallen_degreenorm_embedding_200_" + formatted_date + ".tsv.gz", sep="\t", compression='gzip')

2024-09-10_14_52_33


AttributeError: 'list' object has no attribute 'get_all_node_embedding'

In [None]:
visualizer = GraphVisualizer(kg_microbe).fit_and_plot_all(degree_normalized_deepwalk_embedding)
visualizer.fit_and_plot_all(degree_normalized_deepwalk_embedding)
plt.savefig('../output/DeepWalkSkipGramEnsmallen_degreenorm_embedding_200_'+formatted_date+'.png')
plt.savefig('../output/DeepWalkSkipGramEnsmallen_degreenorm_embedding_200_'+formatted_date+'.pdf')
plt.close() 