### Ontobio approach API for retrieving ancestors of `go:id` and `go:terms`.

In [1]:
import requests,os,sys,glob
import urllib.request
import time
from bs4 import BeautifulSoup
import re
import pandas as pd
import csv
## Create an ontology factory in order to fetch GO
from ontobio.ontol_factory import OntologyFactory
ofactory = OntologyFactory()

## GOLR queries
from ontobio.golr.golr_query import GolrAssociationQuery

## rendering ontologies
from ontobio import GraphRenderer
import owl_tools



In [2]:
#!{sys.executable} -m pip install --upgrade pip
#!{sys.executable} -m pip install ontobio
#!{sys.executable} -m pip install obonet
#!{sys.executable} -m  pip install owl-tools
## Autocomplete tab press jedi
#!{sys.executable} -m pip install pyreadline jedi==0.17.2

In [3]:
#!wget http://purl.obolibrary.org/obo/go/snapshot/go.obo
#!wget http://purl.obolibrary.org/obo/gbo/snapshot/go.owl

### Create ontology file locally with downloaded link 
###### date 19.10.2021


In [4]:
ont = ofactory.create("/home/marius/Environments/BeautifoulScrap/workingEnv/MGI_GO_BP/go.owl")

In [5]:
## Load GO. Note the first time this runs Jupyter will show '*' - be patient
#ont = ofactory.create("go") 

### Finding descendants
Here we are using the in-memory ontology object, no external service calls are executed

Change the value of `term_id` to what you like

### Select the term for enrichment

In [6]:
term_id = "GO:0072673" ## serine family amino acid biosynthetic process

### Descendants

In [7]:
descendants = ont.descendants(term_id, reflexive=True)
descendants

['GO:2000394', 'GO:2000393', 'GO:2000392', 'GO:0072673']

In [8]:
renderer = GraphRenderer.create('tree')
print(renderer.render_subgraph(ont, nodes=descendants))

. GO:0072673 ! lamellipodium morphogenesis
 RO:0002211 GO:2000392 ! regulation of lamellipodium morphogenesis
  % GO:2000393 ! negative regulation of lamellipodium morphogenesis
  % GO:2000394 ! positive regulation of lamellipodium morphogenesis
 RO:0002212 GO:2000393 ! negative regulation of lamellipodium morphogenesis
 RO:0002213 GO:2000394 ! positive regulation of lamellipodium morphogenesis




### Ancestors

In [9]:
ancestors = ont.ancestors(term_id,reflexive=True)
ancestors

['GO:0009653',
 'GO:0048869',
 'GO:0097581',
 'GO:0048856',
 'GO:0032989',
 'GO:0032502',
 'GO:0000902',
 'GO:0071840',
 'GO:0120039',
 'GO:0032990',
 'GO:0008150',
 'GO:0048858',
 'GO:0009987',
 'GO:0016043',
 'GO:0030030',
 'GO:0072673',
 'GO:0120036']

In [10]:
ancestors1 = ont.ancestors(term_id,reflexive=True,relations=['subClassOf'])
ancestors1

['GO:0009653',
 'GO:0048869',
 'GO:0097581',
 'GO:0032989',
 'GO:0032502',
 'GO:0071840',
 'GO:0120039',
 'GO:0032990',
 'GO:0008150',
 'GO:0048858',
 'GO:0009987',
 'GO:0016043',
 'GO:0030030',
 'GO:0072673',
 'GO:0120036']

In [11]:
renderer = GraphRenderer.create('tree')
print(renderer.render_subgraph(ont, nodes=ancestors))

. GO:0008150 ! biological_process
 % GO:0009987 ! cellular process
  % GO:0071840 ! cellular component organization or biogenesis
   % GO:0016043 ! cellular component organization
    % GO:0030030 ! cell projection organization
     % GO:0120036 ! plasma membrane bounded cell projection organization
      % GO:0097581 ! lamellipodium organization
       % GO:0072673 ! lamellipodium morphogenesis
     % GO:0048858 ! cell projection morphogenesis
      % GO:0120039 ! plasma membrane bounded cell projection morphogenesis
       % GO:0072673 ! lamellipodium morphogenesis
    % GO:0032989 ! cellular component morphogenesis
     % GO:0032990 ! cell part morphogenesis
      % GO:0048858 ! cell projection morphogenesis
       % GO:0120039 ! plasma membrane bounded cell projection morphogenesis
        % GO:0072673 ! lamellipodium morphogenesis
  % GO:0048869 ! cellular developmental process
   % GO:0032989 ! cellular component morphogenesis
    % GO:0032990 ! cell part morphogenesis
     % GO:

In [12]:
renderer = GraphRenderer.create('tree')
print(renderer.render_subgraph(ont, nodes=ancestors1))

. GO:0008150 ! biological_process
 % GO:0032502 ! developmental process
  % GO:0009653 ! anatomical structure morphogenesis
   % GO:0032989 ! cellular component morphogenesis
    % GO:0032990 ! cell part morphogenesis
     % GO:0048858 ! cell projection morphogenesis
      % GO:0120039 ! plasma membrane bounded cell projection morphogenesis
       % GO:0072673 ! lamellipodium morphogenesis
  % GO:0048869 ! cellular developmental process
   % GO:0032989 ! cellular component morphogenesis
    % GO:0032990 ! cell part morphogenesis
     % GO:0048858 ! cell projection morphogenesis
      % GO:0120039 ! plasma membrane bounded cell projection morphogenesis
       % GO:0072673 ! lamellipodium morphogenesis
 % GO:0009987 ! cellular process
  % GO:0071840 ! cellular component organization or biogenesis
   % GO:0016043 ! cellular component organization
    % GO:0030030 ! cell projection organization
     % GO:0120036 ! plasma membrane bounded cell projection organization
      % GO:0097581 ! la

In [13]:
tree = [elements.strip(" ").strip("%").strip(".").split("!") for elements in renderer.render_subgraph(ont, nodes=ancestors).split("\n")]
tree_df = pd.DataFrame(tree,columns=["ID", "Process"])
tree_df["Process"] = tree_df["Process"].str.lstrip()
tree_df
go_term = "lamellipodium morphogenesis"
tree_df.iloc[:int(list(tree_df["Process"]).index(go_term))+1]


Unnamed: 0,ID,Process
0,GO:0008150,biological_process
1,GO:0009987,cellular process
2,GO:0071840,cellular component organization or biogenesis
3,GO:0016043,cellular component organization
4,GO:0030030,cell projection organization
5,GO:0120036,plasma membrane bounded cell projection organi...
6,GO:0097581,lamellipodium organization
7,GO:0072673,lamellipodium morphogenesis


In [14]:
tree = [elements.strip(" ").strip("%").strip(".").split("!") for elements in renderer.render_subgraph(ont, nodes=ancestors1).split("\n")]
tree_df = pd.DataFrame(tree,columns=["ID", "Process"])
tree_df["Process"] = tree_df["Process"].str.lstrip()
go_term = "lamellipodium morphogenesis"
tree_df = tree_df.iloc[:int(list(tree_df["Process"]).index(go_term))+1]
tree_df

Unnamed: 0,ID,Process
0,GO:0008150,biological_process
1,GO:0032502,developmental process
2,GO:0009653,anatomical structure morphogenesis
3,GO:0032989,cellular component morphogenesis
4,GO:0032990,cell part morphogenesis
5,GO:0048858,cell projection morphogenesis
6,GO:0120039,plasma membrane bounded cell projection morpho...
7,GO:0072673,lamellipodium morphogenesis


In [15]:
list(tree_df.Process)

['biological_process',
 'developmental process',
 'anatomical structure morphogenesis',
 'cellular component morphogenesis',
 'cell part morphogenesis',
 'cell projection morphogenesis',
 'plasma membrane bounded cell projection morphogenesis',
 'lamellipodium morphogenesis']

### Function to call the result, from a GO:ID and The term of it.

In [16]:
def go_writer(go_id,go_term):
    """Use the GO:ID to select the tree behind it, the ancestors and return the annotations of the terms."""
    #print(go_id+"\tand "+go_term)
    ancestors = ont.ancestors(go_id,reflexive=True)
    #ancestors
    renderer = GraphRenderer.create('tree')
    #print(renderer.render_subgraph(ont, nodes=ancestors))
    #strip(" ").strip("%").strip(".")
    tree = [elements.strip(" %.").split("!") for elements in renderer.render_subgraph(ont, nodes=ancestors).split("\n")]
    tree_df = pd.DataFrame(tree,columns=["ID", "Process"])
    tree_df["Process"] = tree_df["Process"].str.lstrip()
    tree_df = tree_df.iloc[:int(list(tree_df["Process"]).index(go_term))+1]
    return(list(tree_df.iloc[:,1]))   

In [17]:
lists_data = []
a = go_writer(go_id = "GO:0042407", go_term= "cristae formation")
a.reverse()
lists_data.append(a)
lists_data

[['cristae formation',
  'inner mitochondrial membrane organization',
  'mitochondrial membrane organization',
  'mitochondrion',
  'intracellular membrane-bounded organelle',
  'intracellular organelle',
  'organelle',
  'cellular anatomical entity',
  'cellular_component']]

In [18]:
#Convert the object from r to python
df_py = r.dr_unique_deseq2_go_bp_005_sham_GO_T_ID
software = "DESeq2"
species = "Drer"
specific = "SHAM"

def activ_go_writer(software,df_py,species,specific):
    #Create a list and use the prior function.
    df_py_list = list()
    for i in range(len(df_py["ID"])):
        a = go_writer(
            go_id = df_py.iloc[i,1],
            go_term = df_py.iloc[i,0])
        a.reverse()
        df_py_list.append(a)
    #Save the file
    file = open("/home/marius/Documents/Projects/prsa/Outputs/" +
                str(species) + "/clustered_heatmap/" +
                str(software) + "/paths_df_unique_" +
                str(software) + "_" +
                str(species) + "_go_bp_" +
                str(specific) + "_MGI.csv",
                "a+",
                newline = "")
    with file:
        write = csv.writer(file,delimiter=",")
        write.writerows(df_py_list)
    print("Wrote and annotated " + str("df"))
    return(df_py_list)

NameError: name 'r' is not defined

In [None]:
def activ_go_writer(software,df_py,species,specific):
    #Create a list and use the prior function.
    df_py_list = list()
    for i in range(len(df_py["ID"])):
        if df_py.iloc[i,0] == "cell cycle checkpoint" & df_py.iloc[i,1] == "GO:0000075":
            continue
        a = go_writer(
            go_id = df_py.iloc[i,1],
            go_term = df_py.iloc[i,0])
        a.reverse()
        df_py_list.append(a)
    #Save the file
    file = open("/home/marius/Documents/Projects/prsa/Outputs/" +
                str(species) + "/clustered_heatmap/" +
                str(software) + "/paths_df_" +
                str(software) + "_" +
                str(species) + "_go_bp_" +
                str(specific) + "_MGI.csv",
                "a+",
                newline = "")
    with file:
        write = csv.writer(file,delimiter=",")
        write.writerows(df_py_list)
    print("Wrote and annotated " + str("df"))
    return(df_py_list)

### Reformat the function, use only the GO:ID for making it less succeptible for changes between data sets.

In [27]:
tree = [elements.strip(" ").strip("%").strip(".").split("!") for elements in renderer.render_subgraph(ont, nodes=ancestors).split("\n")]

tree_df = pd.DataFrame(tree,columns=["ID", "Process"])
tree_df["Process"] = tree_df["Process"].str.lstrip()
tree_df["ID"] =  tree_df["ID"].str.strip(" ")
#tree_df.iloc[1,0]
tree_df = tree_df.iloc[:int(list(tree_df["ID"]).index(term_id))+1]
tree_df


Unnamed: 0,ID,Process
0,GO:0008150,biological_process
1,GO:0009987,cellular process
2,GO:0071840,cellular component organization or biogenesis
3,GO:0016043,cellular component organization
4,GO:0030030,cell projection organization
5,GO:0120036,plasma membrane bounded cell projection organi...
6,GO:0097581,lamellipodium organization
7,GO:0072673,lamellipodium morphogenesis


In [22]:
def go_writer(go_id):
    """Use the GO:ID to select the tree behind it, the ancestors and return the annotations of the terms."""
    #print(go_id+"\tand "+go_term)
    ancestors = ont.ancestors(go_id,reflexive=True)
    #ancestors
    renderer = GraphRenderer.create('tree')
    #print(renderer.render_subgraph(ont, nodes=ancestors))
    #strip(" ").strip("%").strip(".")
    tree = [elements.strip(" %.").split("!") for elements in renderer.render_subgraph(ont, nodes=ancestors).split("\n")]
    tree_df = pd.DataFrame(tree,columns=["ID", "Process"])
    tree_df["Process"] = tree_df["Process"].str.lstrip()
    tree_df["ID"] =  tree_df["ID"].str.strip(" ")
    tree_df = tree_df.iloc[:int(list(tree_df["ID"]).index(go_id))+1]
    return(list(tree_df.iloc[:,1]))  

In [23]:
lists_data = []
a = go_writer(go_id = "GO:0042407")
a.reverse()
lists_data.append(a)
lists_data

[['cristae formation',
  'inner mitochondrial membrane organization',
  'mitochondrial membrane organization',
  'mitochondrion',
  'intracellular membrane-bounded organelle',
  'intracellular organelle',
  'organelle',
  'cellular anatomical entity',
  'cellular_component']]