In [1]:
import matplotlib.pyplot as plt
import numpy as np
from grape.embedders import FirstOrderLINEEnsmallen
from grape import Graph
from glob import glob

import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
RANDOM_SEED = 12
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

random_seed = random.seed
print(random.seed)
print(np.random.get_state())

<bound method Random.seed of <random.Random object at 0x138051c10>>
('MT19937', array([        12,  274362557, 2871959443,   96505144, 3440553756,
       3320819264, 1671863157, 2853726411, 3576948821, 2267721719,
       1471449267,  205467717,  329644101, 2017124422,  120683281,
       3671631556, 3425389459, 2306138081, 2854342561, 2833917538,
       2139784180, 1777806526, 3803176561, 3129722129, 3441486743,
       1830495613, 3911788038,  683126036, 2275605760, 2143413223,
       1841233884, 3193124688, 3131946362,  854446201,  464715999,
       3543117598, 3696335765,  563062867, 3171341285, 2053716298,
       3493445055, 2373285717, 2744558717, 1005575494, 2549531338,
       3054493973,  794434881,  553407444, 3342173908, 2703770116,
       1986085520, 3203783784, 2832904198,  647337417,  715893123,
       2584083430,  191194412,  374961813, 3419702787, 3499023931,
       1892787284,  234508998, 1165532764, 2246131696,  884260026,
        627310499,   95884945,  105283704, 147383

In [3]:
node_path = glob("../data/merged/merged-kg_nodes.tsv", recursive=True)[0]
edge_path = glob("../data/merged/merged-kg_edges.tsv", recursive=True)[0]

In [4]:
kg_microbe = Graph.from_csv(
    # Edges related parameters

    ## The path to the edges list tsv
    edge_path=edge_path,
    ## Set the tab as the separator between values
    edge_list_separator="\t",
    ## The first rows should NOT be used as the columns names
    edge_list_header=True,
    ## The source nodes are in the first nodes
    sources_column="subject",
    #sources_column_number=1,
    ## The destination nodes are in the second column
    destinations_column="object",
    #destinations_column_number=4,
    ## Both source and destinations columns use numeric node_ids instead of node names
    edge_list_numeric_node_ids=False,
    ## The weights are in the third column
    weights_column_number=None,

    #edge_type_path=edge_path,
    edge_list_edge_types_column="predicate",    

    # Nodes related parameters
    ## The path to the nodes list tsv
    node_path=node_path,
    ## Set the tab as the separator between values
    node_list_separator="\t",
    ## The first rows should be used as the columns names
    node_list_header=True,
    ## The column with the node names is the one with name "node_name".
    nodes_column="id",
    #nodes_column_number=1,

    #node_type_path=node_path,
    node_list_node_types_column="category",
    
    # Graph related parameters
    ## The graph is undirected
    directed=False,
    ## The name of the graph is HomoSapiens
    name="kg_microbe",
    ## Display a progress bar, (this might be in the terminal and not in the notebook)
    verbose=True,
)

In [5]:
kg_microbe


In [17]:
edge_type_ids = kg_microbe.get_edge_type_id_counts_hashmap()
#kg_microbe.get_edge_type_names_counts_hashmap()
#edge_type_ids
keys = edge_type_ids.keys()

for i in keys:
    print(str(i)+"\t"+kg_microbe.get_edge_type_name_from_edge_type_id(i))

5	biolink:interacts_with
4	biolink:location_of
8	biolink:has_chemical_role
10	biolink:inverseOf
11	biolink:has_part
6	biolink:subPropertyOf
3	biolink:has_phenotype
7	biolink:related_to
0	biolink:occurs_in
2	biolink:capable_of
1	biolink:subclass_of
9	biolink:type


In [28]:
?Graph.filter_from_ids

[0;31mSignature:[0m
[0mGraph[0m[0;34m.[0m[0mfilter_from_ids[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mself[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m/[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnode_ids_to_keep[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnode_ids_to_remove[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnode_names_to_keep_from_graph[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnode_names_to_remove_from_graph[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnode_prefixes_to_keep[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnode_prefixes_to_remove[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnode_type_ids_to_keep[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnode_type_ids_to_remove[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnode_type_id_to_keep[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnode_type_id_to_remove[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msource_node_ids_to_keep[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msource_node_ids_to_re

In [30]:
kg_microbe_notaxmed = kg_microbe.filter_from_ids(edge_type_ids_to_remove=[0])

In [31]:
kg_microbe_notaxmed

In [32]:
%%time
from grape.embedders import DeepWalkSkipGramEnsmallen

model = DeepWalkSkipGramEnsmallen(
    normalize_by_degree=True
)

degree_normalized_deepwalk_embedding_notaxmed = model.fit_transform(kg_microbe_notaxmed)



KeyboardInterrupt: 

In [None]:
type(degree_normalized_deepwalk_embedding_notaxmed)

In [21]:
import datetime

import pickle
import pandas as pd

current_datetime = datetime.datetime.now()
formatted_date = current_datetime.strftime("%Y-%m-%d_%H_%M_%S")

with open('degree_normalized_deepwalk_embedding_notaxmed_'+formatted_date+'.pkl', 'wb') as file:
    # Use the 'wb' mode to write in binary mode
    pickle.dump(degree_normalized_deepwalk_embedding_notaxmed, file)


NameError: name 'degree_normalized_deepwalk_embedding_notaxmed' is not defined

In [None]:
node_embed = degree_normalized_deepwalk_embedding_notaxmed.get_all_node_embedding()#to_csv(formatted_date+".tsv",sep="\t")

In [None]:
node_embed[1]


In [None]:
node_embed_df = pd.DataFrame(node_embed[0])
node_embed_df.to_csv("degree_normalized_deepwalk_embedding_notaxmed_"+formatted_date+".tsv", sep="\t")

In [None]:
node_embed_df = pd.DataFrame(node_embed[1])
node_embed_df.to_csv("degree_normalized_deepwalk_embedding_notaxmed_1_"+formatted_date+".tsv", sep="\t")

In [None]:
from grape import GraphVisualizer

GraphVisualizer(kg_microbe).fit_and_plot_all(degree_normalized_deepwalk_embedding_notaxmed)
