In [None]:
import os
import yaml
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import networkx as nx
from IPython.display import HTML, display
#import matplotlib.pyplot as plt
import plotly.io as pio
pio.renderers.default = "notebook"

data_path = os.path.join('dashboard','dashboard-results.yml')
with open(data_path, 'r') as stream:
    try:
        data = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

In [None]:
def extract_number(data,metric,submetric=None):
    if submetric:
        return([o['metrics'][metric][submetric] if 'metrics' in o and metric in o['metrics'] and submetric in o['metrics'][metric] else 0 for o in data['ontologies']])
    else:
        return([o['metrics'][metric] if 'metrics' in o and metric in o['metrics'] else 0 for o in data['ontologies']])


m_onts = [o['namespace'] for o in data['ontologies']]
m_syntax = [o['metrics']['Info: Syntax'] if 'metrics' in o and 'Info: Syntax' in o['metrics'] else "unknown" for o in data['ontologies']]


df = pd.DataFrame({
    'ontology': m_onts, 
    'axioms': extract_number(data,'Axioms: Number of axioms'), 
    'classes': extract_number(data,'Entities: Number of classes'), 
    'entities_reused': extract_number(data,'Entities: % of entities reused'),
    'uses': extract_number(data,'Info: How many ontologies use it?'),
    'score': extract_number(data,'Info: Experimental OBO score','oboscore'),
    'score_dash': extract_number(data,'Info: Experimental OBO score','_dashboard'),
   # 'score_reuse': extract_number(data,'Info: Experimental OBO score','_reuse'),
    'score_impact': extract_number(data,'Info: Experimental OBO score','_impact'),
    #'score_impact_external': extract_number(data,'Info: Experimental OBO score','_impact_external'),
    'syntax': m_syntax
})

df_all = pd.json_normalize(data['ontologies'])

#Info: Breakdown of OWL class expressions used
#results
def plot_bar(df, feature, logx=True):
    df.sort_values(by=feature,inplace=True)
    height = 300+(len(df)*10)
    fig = px.bar(df, y="ontology", x=feature, orientation='h', width=800,height=height, log_x=logx)
    fig.update_layout(
        yaxis=dict(
            title='ontology',
            tickmode='linear')
    )

    fig.show() 

# OBO Foundry dashboard analysis

In [None]:
display(HTML('<a href="index.html">Back to dashboard main page</a>'))

In [None]:
def table_breakdown(df_all, col_prefix):
    df_axiom_types = df_all[[col for col in df_all if (col.startswith(col_prefix))]].copy()
    df_axiom_types['o']=df_all['namespace']  
    df_axiom_types.columns = [col.replace(col_prefix,"") for col in df_axiom_types]
    df_axiom_types.fillna(0,inplace=True)
    dt_info=df_axiom_types.describe().T
    dt_info['count']=df_axiom_types.astype(bool).sum(axis=0)
    dt_info.sort_values(by='count',inplace=True, ascending=False)
    return dt_info

In [None]:
dashboard_results = []
for o in data['ontologies']:
    if 'results' in o:
        for res in o['results']:
            if res.startswith("FP"):
                dashboard_results.append([o['namespace'], res, o['results'][res]['status']])
df_results = pd.DataFrame(dashboard_results,columns=['ontology', 'check', 'status'])
df_results_agg = df_results.groupby(['check','status']).agg(['count'])
df_results_agg = df_results_agg.add_suffix('_Count').reset_index()
df_results_agg.columns = ['check','status','count']

dfx=df_results_agg.pivot(index='check', columns='status', values='count')
dfx.index
dfx.reset_index(inplace=True)
dfx.fillna(0,inplace=True)
dfx = pd.melt(dfx, id_vars='check', value_vars=['ERROR', 'INFO', 'PASS','WARN'])
dfx['value']=dfx['value'].astype(int)
dfx['status'] = dfx['status'].astype('category')
errcats = ['PASS', 'INFO','WARN','ERROR']
errcats.reverse()
dfx['status'].cat.reorder_categories(errcats, inplace=True)
dfx.sort_values(['check','status'], ascending=False, inplace=True)

height = 300+(len(df_results_agg)*10)
color_map_errors={'PASS': '#c3e6cb', 'INFO': '#bee5eb', 'WARN': '#ffeeba', 'ERROR': '#f5c6cb'}
fig = px.bar(dfx, y="check", x='value', labels={"check": "OBO Principle", "value": "Number of ontologies"}, color='status', orientation='h', width=800,height=height, color_discrete_map=color_map_errors)
fig.show()

In [None]:
display(HTML('<h3>Ontologies by number of axioms</h3>'))
plot_bar(df, "axioms")

In [None]:
display(HTML('<h3>Ontologies by number of classes</h3>'))
plot_bar(df, "classes")

In [None]:
display(HTML('<h3>Ontologies by how many ontologies use it</h3>'))
plot_bar(df, "uses")

In [None]:
display(HTML('<h3>Different serialisations used</h3>'))
df['syntax'].value_counts()

In [None]:
display(HTML('<h3>Breakdown of used Axiom Types</h3>'))
display(HTML(table_breakdown(df_all,'metrics.Axioms: Breakdown of axiom types.').to_html()))

display(HTML('<h3>Breakdown of used OWL Class Expression constructs</h3>'))
display(HTML(table_breakdown(df_all,'metrics.Info: Breakdown of OWL class expressions used.').to_html()))


In [None]:
#,'score_reuse', 'score_impact_external'
df_score = df[['ontology','score','score_dash','score_impact']].copy()
#df_score=
df_score.sort_values('score',inplace=True, ascending=False)

display(HTML('<h3>OBO Score (Experimental)</h3>'))
display(HTML(df_score.to_html()))
display(HTML('<h4>OBO Score Summary</h4>'))
display(HTML(df_score.describe().T.to_html()))

In [None]:

dependencies = dict()
for o in data['ontologies']:
    if 'metrics' in o:
        if 'Info: Which ontologies use it?' in o['metrics']:
            dependencies[o['namespace']] = o['metrics']['Info: Which ontologies use it?']
            
G=nx.DiGraph()
#for o in dependencies:
#    if len(dependencies[o])>1:
#        G.add_node(o)
for o in dependencies:
    if len(dependencies[o])>1:
        for dep in dependencies[o]:
            G.add_edge(o,dep)

#nx.draw(G, with_labels=True, font_weight='bold')
#d = dict(G.degree)


#plt.figure(figsize=(12,12)) 
#node_size = [v * 50 if v>2 else 5 for v in d.values()]
#color_map = []
#for v in d.values():
#    if v<20:
#        color_map.append("yellow")
#    elif v<50:
#        color_map.append("green")
#    else:
#        color_map.append("blue")
#nx.draw_random(G, with_labels=True, node_size=node_size, node_color=color_map)
#plt.show()

In [None]:
display(HTML('<h3>OBO dependency graph</h3>'))

Gr = G
edge_x = []
edge_y = []

pos = nx.kamada_kawai_layout(Gr)

for edge in Gr.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines')

node_x = []
node_y = []
node_count = len(Gr.nodes())
for node in Gr.nodes():
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)

node_adjacencies = []
node_sizes = []
node_text = []
for node, adjacencies in enumerate(Gr.adjacency()):
    node_adjacencies.append(len(adjacencies[1]))
    uses = len(adjacencies[1])
    node_text.append(f'{adjacencies[0]} ({uses} uses)')
    node_sizes.append((20*uses)/node_count+10)
    
node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        # colorscale options
        #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
        #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
        #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
        colorscale='YlGnBu',
        reversescale=True,
        color=[],
        size=node_sizes,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=2))


node_trace.marker.color = node_adjacencies
node_trace.text = node_text
fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                )
fig.show()