# Ontology overview
This notebook contains code for converting go.obo file into dataframe object and export it as csv. There is also a function `check_all_paths`, which accepts as arguments previously loaded with obonet graph and list of gene ontology (GO) id's. Function looks for relations between all nodes in list and returns all of it in that format: [[(node1, node2, relation), node1, node2, relation], [...]].

For next improvments, there is need to:
- Convert code into object paradigm, and use ex. fastAPI for comunication with frontend
- Optimize `check_all_paths` function and use multithreading

In [1]:
import obonet
import networkx as nx
import pandas as pd

graph = obonet.read_obo('go.obo')

nodes_df = pd.DataFrame.from_dict(dict(graph.nodes(data=True)), orient='index').reset_index()
nodes_df = nodes_df.rename(columns={'index': 'id'})
print("Nodes DataFrame Head:")
print(nodes_df.head())

edges_df = nx.to_pandas_edgelist(graph)
print("Edges DataFrame Head:")
print(edges_df.head())


print("Final Nodes DataFrame:")
print(nodes_df.head())
print("Final Edges DataFrame:")
print(edges_df.head())

Nodes DataFrame Head:
           id                                               name  \
0  GO:0000001                          mitochondrion inheritance   
1  GO:0000002                   mitochondrial genome maintenance   
2  GO:0000006  high-affinity zinc transmembrane transporter a...   
3  GO:0000007  low-affinity zinc ion transmembrane transporte...   
4  GO:0000009             alpha-1,6-mannosyltransferase activity   

            namespace                                                def  \
0  biological_process  "The distribution of mitochondria, including t...   
1  biological_process  "The maintenance of the structure and integrit...   
2  molecular_function  "Enables the transfer of zinc ions (Zn2+) from...   
3  molecular_function  "Enables the transfer of a solute or solutes f...   
4  molecular_function  "Catalysis of the transfer of a mannose residu...   

                                             synonym  \
0             ["mitochondrial inheritance" EXACT []]   


In [2]:
nodes_df.head()

Unnamed: 0,id,name,namespace,def,synonym,is_a,xref,property_value,alt_id,created_by,creation_date,subset,relationship,intersection_of,comment,disjoint_from
0,GO:0000001,mitochondrion inheritance,biological_process,"""The distribution of mitochondria, including t...","[""mitochondrial inheritance"" EXACT []]","[GO:0048308, GO:0048311]",,,,,,,,,,
1,GO:0000002,mitochondrial genome maintenance,biological_process,"""The maintenance of the structure and integrit...",,[GO:0007005],,,,,,,,,,
2,GO:0000006,high-affinity zinc transmembrane transporter a...,molecular_function,"""Enables the transfer of zinc ions (Zn2+) from...","[""high affinity zinc uptake transmembrane tran...",[GO:0005385],,,,,,,,,,
3,GO:0000007,low-affinity zinc ion transmembrane transporte...,molecular_function,"""Enables the transfer of a solute or solutes f...",,[GO:0005385],,,,,,,,,,
4,GO:0000009,"alpha-1,6-mannosyltransferase activity",molecular_function,"""Catalysis of the transfer of a mannose residu...","[""1,6-alpha-mannosyltransferase activity"" EXAC...",[GO:0000030],"[Reactome:R-HSA-449718 ""Addition of a third ma...",[term_tracker_item https://github.com/geneonto...,,,,,,,,


In [3]:
nodes_df.to_csv('go_nodes.csv', index=False)

In [4]:
def check_all_paths(graph, ontology_ids):
    found_paths_with_relations = []
    for i, node1 in enumerate(ontology_ids):
        for node2 in ontology_ids[i+1:]:
            if nx.has_path(graph, node1, node2):
                path = nx.shortest_path(graph, node1, node2)
                path_with_relations = []
                
                for j in range(len(path) - 1):
                    start_node = path[j]
                    end_node = path[j + 1]
                    relations = graph[start_node][end_node]
                    relation_type = next(iter(relations), None)
                    path_with_relations.append((start_node, end_node, relation_type))
                
                found_paths_with_relations.append(path_with_relations)
             
    return found_paths_with_relations

In [6]:
ontology_ids = nodes_df['id'].sample(1000).tolist()
found_paths_with_relations = check_all_paths(graph, ontology_ids)

import json
with open('found_paths_with_relations.json', 'w') as f:
    json.dump(found_paths_with_relations, f, indent=4)

In [7]:
found_paths_with_relations

[[('GO:0050648', 'GO:1901567', 'is_a')],
 [('GO:2001278', 'GO:2000284', 'is_a'),
  ('GO:2000284', 'GO:0045764', 'is_a'),
  ('GO:0045764', 'GO:0006521', 'is_a'),
  ('GO:0006521', 'GO:0010565', 'is_a')],
 [('GO:1905012', 'GO:0034354', 'regulates')],
 [('GO:1905012', 'GO:0090357', 'is_a'),
  ('GO:0090357', 'GO:0006521', 'is_a'),
  ('GO:0006521', 'GO:0010565', 'is_a')],
 [('GO:0045982', 'GO:0006144', 'negatively_regulates')],
 [('GO:1902675', 'GO:0036064', 'is_a'),
  ('GO:0036064', 'GO:0005929', 'part_of'),
  ('GO:0005929', 'GO:0120025', 'is_a'),
  ('GO:0120025', 'GO:0042995', 'is_a')],
 [('GO:2000750', 'GO:2000100', 'is_a')],
 [('GO:2000317', 'GO:0072538', 'negatively_regulates')],
 [('GO:0034675', 'GO:0008305', 'is_a'), ('GO:0008305', 'GO:0098802', 'is_a')],
 [('GO:0098957', 'GO:0008089', 'is_a'),
  ('GO:0008089', 'GO:0098930', 'is_a'),
  ('GO:0098930', 'GO:0008088', 'is_a'),
  ('GO:0008088', 'GO:0043005', 'occurs_in'),
  ('GO:0043005', 'GO:0120025', 'is_a'),
  ('GO:0120025', 'GO:0042995