# Converting BEL graphs into inputs for GAT2VEC.

In [1]:
import os

from GAT2VEC import paths as gat2vec_paths
from networkx import DiGraph, write_adjlist
from networkx.relabel import convert_node_labels_to_integers
from pybel import from_url, from_path
from pybel.dsl import BaseEntity, protein, rna
from pybel.struct.mutation.collapse import collapse_to_genes, collapse_all_variants
from pybel_tools.assembler.reified_graph import assembler
import bio2bel_phewascatalog

In [2]:
basedir = '/home/mau/Uni/Thesis/bel_data/tau'
tau_dict = {
    'basedir': basedir,
    'url': 'https://raw.githubusercontent.com/neurommsig/neurommsig-knowledge' + \
           '/master/neurommsig_knowledge/Tau%20protein%20subgraph.bel',
    'localfile': os.path.join(basedir, "Tau_subgraph.bel"),
    'struct_file': os.path.join(basedir, "tau_graph.adjlist"),
    'attr_file': os.path.join(basedir, "tau_na.adjlist")
}
basedir = '/home/mau/Uni/Thesis/bel_data/alzh'
gsk3_dict = {
    'basedir': basedir,
    'url': 'https://raw.githubusercontent.com/neurommsig/neurommsig-knowledge/' + \
           'master/neurommsig_knowledge/GSK3%20subgraph.bel',
    'localfile': os.path.join(basedir, "alzheimers.bel"),
    'struct_file': os.path.join(basedir, "alzh_graph.adjlist"),
    'attr_file': os.path.join(basedir, "alzh_na.adjlist")
}
param_dict = tau_dict

In [3]:
basedir = param_dict['basedir']

# possible inputs
url = param_dict['url']
local = param_dict['localfile']

# Output files 
struct_file = param_dict['struct_file']
attr_file = param_dict['attr_file']

## Read and reify a BEL graph.

In [4]:
# graph = from_url(url)
graph = from_path(local)

print("Nodes before", len(graph.nodes))
print("Edges before", len(graph.edges))

# Collapse all variants is removing also the pmod(X) from the BaseEntity
collapse_all_variants(graph)
# TODO collapse_to_genes removes the ptm information before converting
# collapse_to_genes(graph)

print("Nodes collapsed", len(graph.nodes))
print("Edges collapsed", len(graph.edges))

# Reify the edges
rbg = assembler.reify_bel_graph(graph)

print("Nodes after", len(rbg.nodes))
print("Edges after", len(rbg.edges))

/home/mau/Uni/Thesis/bel_data/tau/Tau_subgraph.bel:0:0 MissingMetadataException Missing required document metadata: Authors
/home/mau/Uni/Thesis/bel_data/tau/Tau_subgraph.bel:0:0 MissingMetadataException Missing required document metadata: Description


Nodes before 284
Edges before 836
Nodes collapsed 230
Edges collapsed 772


































































Nodes after 625
Edges after 892


## Assess the converted entity and predicate nodes.

In [5]:
qty_predicate = {}
qty_prot, qty_rna = 0, 0
for i in rbg.nodes:
    if isinstance(i, BaseEntity):
        if isinstance(i, rna):
            qty_rna += 1
        elif isinstance(i, protein):
            qty_prot += 1
    else:
        if rbg.nodes[i]['label'] in qty_predicate:
            qty_predicate[rbg.nodes[i]['label']] += 1
        else:
            qty_predicate[rbg.nodes[i]['label']] = 1

print("Predicates")
print(qty_predicate)

print(f"Proteins {qty_prot}")
print(f"RNA      {qty_rna}")

Predicates
{'abundance': 5491, 'degradates': 156, 'activates': 640, 'translates': 490, 'hasComponent': 1670}
Proteins 1292
RNA      336


## Adding PheWAS annotation.

In [10]:
file = "/home/mau/Uni/Thesis/Phewas-catalog/phewas/phewas-catalog.csv"
attr_rbg = DiGraph()

# pw_dict = phewasgraph.PhewasAssembler.to_dict(file)

phewas_manager = bio2bel_phewascatalog.Manager()
pw_dict = phewas_manager.to_bel()
print("bu")
for node in rbg.nodes:
    if ((isinstance(node, protein) or
         isinstance(node, rna)) and
            node['name'] in pw_dict):
        for odds, phtype in pw_dict[node['name']]:
            if phtype:
                attr_rbg.add_edge(node, phtype, label=odds)

print("before phewas", len(rbg.nodes))
print("after phewas", len(attr_rbg.nodes))

True




PheWAS Catalog - generating BEL:   0%|          | 0/215107 [00:00<?, ?it/s][A[A

PheWAS Catalog - generating BEL:   0%|          | 1/215107 [00:00<23:42:16,  2.52it/s][A[A

PheWAS Catalog - generating BEL:   0%|          | 4/215107 [00:00<17:13:38,  3.47it/s][A[A

PheWAS Catalog - generating BEL:   0%|          | 7/215107 [00:00<12:40:28,  4.71it/s][A[A

PheWAS Catalog - generating BEL:   0%|          | 11/215107 [00:00<9:27:16,  6.32it/s][A[A

AttributeError: 'NoneType' object has no attribute 'identifier'



PheWAS Catalog - generating BEL:   0%|          | 11/215107 [00:19<9:27:16,  6.32it/s][A[A

## Write adjacency list to file for GAT2VEC

In [17]:
# Structure graph

out_rbg = convert_node_labels_to_integers(rbg, first_label=1, label_attribute='old_label')
write_adjlist(out_rbg, struct_file)

# Attribute graph
out_rbg = convert_node_labels_to_integers(merged_rbg, first_label=1,label_attribute='old_label')
write_adjlist(out_rbg, attr_file)

print([(j, len(i)) for j, i in pw_dict.items() if len(i) > 500])

NameError: name 'merged_rbg' is not defined

In [4]:
"done"

'done'