Represent CO_TO mappings using hive plots : http://egweb.bcgsc.ca/

In [1]:
# import libraries (including our short d3_lib script)
from IPython.core.display import HTML
import d3_lib
import json
from collections import OrderedDict


### get the TO classes ordered by the category type

In [2]:
%%script bash
arq --results JSON --data https://raw.githubusercontent.com/Planteome/plant-trait-ontology/master/plant-trait-ontology.obo.owl '
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>  
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>  
PREFIX owl: <http://www.w3.org/2002/07/owl#> 
PREFIX oio: <http://www.geneontology.org/formats/oboInOwl#>  
PREFIX obo: <http://purl.obolibrary.org/obo/>  

SELECT distinct ?x ?xlabel ?o ?olabel ?topCat
WHERE {
    { select ?x ?xlabel
        where {
            ?x rdfs:subClassOf ?o . 
            ?x rdfs:label ?xlabel . 
            FILTER (!isBlank(?o))
            FILTER (!regex(str(?o), "PATO_"))
            }            
    }
    ?x rdfs:subClassOf ?o .
    ?o rdfs:label ?olabel .
    ?o rdfs:subClassOf* ?topCat .
    ?topCat rdfs:subClassOf <http://purl.obolibrary.org/obo/TO_0000387> .
}
order by ?topCat ?xlabel
' > data/to.json

# prepare the data for the hive plot

In [3]:
graph = {"nodes": [], "links": []}

### TO classes

In [4]:
with open('data/to.json') as data_file:    
    data = json.load(data_file)
    
    list = data["results"]["bindings"]
    
    temp = OrderedDict()
    
    #force plant trait to be the first node
    #graph["nodes"].append( {"x": 0, "y": y/100, "name": "plant trait", "id": "http://purl.obolibrary.org/obo/TO_0000387" } )
    #y+=1
    for entry in list:
        temp[entry["x"]["value"]]={"x": 0, "name": entry["xlabel"]["value"], "id":entry["x"]["value"], "type":entry["topCat"]["value"] }
        #temp.append( {"x": 0, "name": entry["xlabel"]["value"], "id":entry["x"]["value"], "type":entry["topCat"]["value"] } )
    
    print(len(temp))
          

1371


### CO classes

In [5]:
with open('data/co.json') as co_file:    
    co = json.load(co_file)
    ##position on the axes
    y1 = 1
    y2 = 1
    ## will be used to keep TO classes that have a mapping to CO
    tab = []
    
    #create the nodes
    for entry in co:
        del entry['group']  
        del entry['value']
        if "CO_341" in entry["id"]:
            entry["x"] = 1
            entry["y"] = y1/100
            y1+=1
            graph["nodes"].append(entry)
            tab.append(entry["id"])
        if "CO_336" in entry["id"]:
            entry["x"] = 1
            entry["y"] = y1/100
            y1+=1
            graph["nodes"].append(entry)
            tab.append(entry["id"])
        if "CO_339" in entry["id"]:
            entry["x"] = 1
            entry["y"] = y1/100
            y1+=1
            graph["nodes"].append(entry)
            tab.append(entry["id"])
        if "CO_320" in entry["id"]:
            entry["x"] = 1
            entry["y"] = y1/100
            y1+=1
            graph["nodes"].append(entry)
            tab.append(entry["id"])
        if "CO_322" in entry["id"]:
            entry["x"] = 2
            entry["y"] = y2/100
            y2+=1
            graph["nodes"].append(entry)
            tab.append(entry["id"])
        if "CO_321" in entry["id"]:
            entry["x"] = 2
            entry["y"] = y2/100
            y2+=1
            graph["nodes"].append(entry)
            tab.append(entry["id"])
        if "CO_334" in entry["id"]:
            entry["x"] = 2
            entry["y"] = y2/100
            y2+=1
            graph["nodes"].append(entry)
            tab.append(entry["id"])
        elif "CO_331" in entry["id"]:
            entry["x"] = 2
            entry["y"] = y2/100
            y2+=1
            graph["nodes"].append(entry)
            tab.append(entry["id"])
            
print(len(graph["nodes"]))      

1145


In [6]:
### remove the TO classes that don't map any CO class
with open('data/COTOMappings.json') as mapping_file:    
    coto = json.load(mapping_file)
    
    ##for the position on the axis
    y = 1
    tab1 = [] 
    for cl in coto:
        if(cl["x"] in tab):
            tab1.append(cl["o"]);
 
    for e,v in temp.items():
        if(e in tab1):
            v["y"]=y/100
            y+=1
            graph["nodes"].append(v)
                
                
    print(len(graph["nodes"])) 

1529


### add mappings

In [7]:
with open('data/COTOMappings.json') as data_file:    
    mapping = json.load(data_file)
    
    #create the links
    for entry in mapping:
        x=0; 
        o=0; 
        for e in graph["nodes"]:
            if e["id"] == entry["x"]:
                x=e
            elif e["id"] == entry["o"]:
                o=e
                   
        if(x!=0 and o!=0):
            graph["links"].append( {"source": x, "target": o} )
            
print(len(graph["links"])) 

1123


In [9]:
with open('data/hive.json', 'w') as outfile:
    json.dump(graph, outfile)

### hive plot

In [10]:
# visualize as force-directed graph in D3
HTML(d3_lib.set_styles(['hive']) + 
'<script src="lib/d3/d3.min.js"></script>' + 
     '<script src="lib/d3.hive.min.js"></script>' + 
      d3_lib.draw_graph('hive', {'data': graph}) )

In [8]:
graph

{'links': [{'source': {'id': 'CO_341:0000057',
    'name': 'Sterility mosaic incidence',
    'x': 1,
    'y': 0.01},
   'target': {'id': 'http://purl.obolibrary.org/obo/TO_0000148',
    'name': 'viral disease resistance',
    'type': 'http://purl.obolibrary.org/obo/TO_0000164',
    'x': 0,
    'y': 2.82}},
  {'source': {'id': 'CO_341:0000081', 'name': 'Stem color', 'x': 1, 'y': 0.02},
   'target': {'id': 'http://purl.obolibrary.org/obo/TO_0000056',
    'name': 'stem color',
    'type': 'http://purl.obolibrary.org/obo/TO_0000017',
    'x': 0,
    'y': 1.7}},
  {'source': {'id': 'CO_341:0000151',
    'name': 'Soil salinity stress susceptibility',
    'x': 1,
    'y': 0.03},
   'target': {'id': 'http://purl.obolibrary.org/obo/TO_0000168',
    'name': 'abiotic stress trait',
    'type': 'http://purl.obolibrary.org/obo/TO_0000164',
    'x': 0,
    'y': 2.08}},
  {'source': {'id': 'CO_341:0000152',
    'name': 'Soil acidity stress susceptibility',
    'x': 1,
    'y': 0.04},
   'target': {'i