In [None]:
import os
import yaml
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import networkx as nx
from IPython.display import HTML, display
import matplotlib.pyplot as plt
import plotly.io as pio
pio.renderers.default = "notebook"

data_path = os.path.join('dashboard','dashboard-results.yml')
with open(data_path, 'r') as stream:
    try:
        data = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

In [None]:
m_axioms = [o['metrics']['Axioms: Number of axioms'] if 'metrics' in o else 0 for o in data['ontologies']]
m_uses = [o['metrics']['Info: How many ontologies use it?'] if 'metrics' in o else 0 for o in data['ontologies']]
m_onts = [o['namespace'] for o in data['ontologies']]
m_classes = [o['metrics']['Entities: Number of classes'] if 'metrics' in o else 0 for o in data['ontologies']]
m_entities_reuse = [o['metrics']['Entities: % of entities reused'] if 'metrics' in o else 0 for o in data['ontologies']]
m_syntax = [o['metrics']['Info: Syntax'] if 'metrics' in o else "unknown" for o in data['ontologies']]
m_score = [o['metrics']['Info: Experimental OBO score']['oboscore'] if 'metrics' in o else 0 for o in data['ontologies']]
m_score_dash = [o['metrics']['Info: Experimental OBO score']['_dashboard'] if 'metrics' in o else 0 for o in data['ontologies']]
m_score_reuse = [o['metrics']['Info: Experimental OBO score']['_reuse'] if 'metrics' in o else 0 for o in data['ontologies']]
m_score_impact = [o['metrics']['Info: Experimental OBO score']['_impact'] if 'metrics' in o else 0 for o in data['ontologies']]



df = pd.DataFrame({
    'ontology': m_onts, 
    'axioms': m_axioms, 
    'classes': m_classes, 
    'entities_reused': m_entities_reuse,
    'uses': m_uses,
    'score': m_score,
    'score_dash': m_score_dash,
    'score_reuse': m_score_reuse,
    'score_impact': m_score_impact,
    'syntax': m_syntax
})

df_all = pd.json_normalize(data['ontologies'])

#Info: Breakdown of OWL class expressions used
#results
def plot_bar(df, feature, logx=True):
    df.sort_values(by=feature,inplace=True)
    height = 300+(len(df)*10)
    fig = px.bar(df, y="ontology", x=feature, orientation='h', width=800,height=height, log_x=logx)
    fig.update_layout(
        yaxis=dict(
            title='ontology',
            tickmode='linear')
    )

    fig.show() 

# OBO Foundry dashboard analysis

In [None]:
display(HTML('<a href="index.html">Back to dashboard main page</a>'))

In [None]:
def table_breakdown(df_all, col_prefix):
    df_axiom_types = df_all[[col for col in df_all if (col.startswith(col_prefix))]].copy()
    df_axiom_types['o']=df_all['namespace']  
    df_axiom_types.columns = [col.replace(col_prefix,"") for col in df_axiom_types]
    df_axiom_types.fillna(0,inplace=True)
    dt_info=df_axiom_types.describe().T
    dt_info['count']=df_axiom_types.astype(bool).sum(axis=0)
    dt_info.sort_values(by='count',inplace=True, ascending=False)
    return dt_info



In [None]:
dashboard_results = []
for o in data['ontologies']:
    if 'results' in o:
        for res in o['results']:
            if res.startswith("FP"):
                dashboard_results.append([o['namespace'], res, o['results'][res]['status']])
df_results = pd.DataFrame(dashboard_results,columns=['ontology', 'check', 'status'])
df_results_agg = df_results.groupby(['check','status']).agg(['count'])
df_results_agg = df_results_agg.add_suffix('_Count').reset_index()
df_results_agg.columns = ['check','status','count']
df_results_agg.sort_values('check', ascending=False, inplace=True)
height = 300+(len(df)*10)
fig = px.bar(df_results_agg, y="check", x='count', color='status', orientation='h', width=800,height=height)
fig.update_layout(
    yaxis=dict(
        title='check',
        tickmode='linear')
)

fig.show()

In [None]:
display(HTML('<h3>Ontologies by number of axioms</h3>'))
plot_bar(df, "axioms")

In [None]:
display(HTML('<h3>Ontologies by number of classes</h3>'))
plot_bar(df, "classes")

In [None]:
display(HTML('<h3>Ontologies by how many ontologies use it</h3>'))
plot_bar(df, "uses")

In [None]:
display(HTML('<h3>Different serialisations used</h3>'))
df['syntax'].value_counts()

In [None]:
display(HTML('<h3>Breakdown of used Axiom Types</h3>'))
display(HTML(table_breakdown(df_all,'metrics.Axioms: Breakdown of axiom types.').to_html()))

display(HTML('<h3>Breakdown of used OWL Class Expression constructs</h3>'))
display(HTML(table_breakdown(df_all,'metrics.Info: Breakdown of OWL class expressions used.').to_html()))


In [None]:

df_score = df[['ontology','score','score_dash','score_reuse','score_impact']].copy()
#df_score=
df_score.sort_values('score',inplace=True, ascending=False)

display(HTML('<h3>OBO Score (Experimental)</h3>'))
display(df_score)
display(HTML('<h4>OBO Score Summary</h4>'))
display(df_score.describe().T)

In [None]:
display(HTML('<h3>OBO dependency graph</h3><p>yellow circles have less than 20, green more than 20 but less than 50 and blue circles more than 50 registered uses.</p>'))

dependencies = dict()
for o in data['ontologies']:
    if 'metrics' in o:
        if 'Info: Which ontologies use it?' in o['metrics']:
            dependencies[o['namespace']] = o['metrics']['Info: Which ontologies use it?']
            
G=nx.DiGraph()
#for o in dependencies:
#    if len(dependencies[o])>1:
#        G.add_node(o)
for o in dependencies:
    if len(dependencies[o])>1:
        for dep in dependencies[o]:
            G.add_edge(dep,o)

#nx.draw(G, with_labels=True, font_weight='bold')
d = dict(G.degree)


plt.figure(figsize=(12,12)) 
node_size = [v * 50 if v>2 else 5 for v in d.values()]
color_map = []
for v in d.values():
    if v<20:
        color_map.append("yellow")
    elif v<50:
        color_map.append("green")
    else:
        color_map.append("blue")
nx.draw_random(G, with_labels=True, node_size=node_size, node_color=color_map)
plt.show()