In [None]:
from rdflib import Graph, URIRef, Literal, Namespace
import pandas as pd

In [None]:
fi = '../00_shared_data/Tadirah/tadirah.ttl'

# load the data
g = Graph()
g.parse(fi, format='turtle')

data = []
for s, p, o in g.triples((None, None, None)):
    s = s.split('/')[-1]
    p = p.split('/')[-1]
    p = p.split('#')[-1]
    o = o.split('/')[-1]
    data.append([s, p, o])

df = pd.DataFrame(data, columns=['subject', 'predicate', 'object'])
df.head()

In [None]:
edges = df[df['predicate'] == 'narrower'][['subject', 'object']].rename(columns={'subject': 'Source', 'object': 'Target'})

top_edges = df[df['predicate'] == 'hasTopConcept'][['subject', 'object']].rename(columns={'subject': 'Source', 'object': 'Target'})
top_edges['Source'] = 'Tadirah'

edges = pd.concat([edges, top_edges])
edges.head()

In [None]:
# Add weights to the edges
weight = 100
sources = ['Tadirah']
edges['Weight'] = 0
more = True
while more:
    print (weight)
    edges.loc[edges['Source'].isin(sources), 'Weight'] = weight
    sources = edges.loc[edges['Source'].isin(sources), 'Target'].unique()
    weight = int(weight / 2)
    if len(sources) == 0:
        more = False


In [None]:
sources = ['Tadirah']
edges.loc[edges['Source'].isin(sources)]

In [None]:
edges.sort_values('Weight', ascending=False)

In [None]:
nodes_left = edges[['Source', 'Weight']].copy().rename(columns={'Source': 'Id'})
nodes_right = edges[['Target', 'Weight']].copy().rename(columns={'Target': 'Id'})
nodes_right['Weight'] = nodes_right['Weight'].apply(lambda x: int(x/2))
nodes = pd.concat([nodes_left, nodes_right])
nodes = nodes.sort_values('Weight', ascending=False).drop_duplicates('Id')
nodes['Label'] = nodes['Id']
nodes = nodes.drop_duplicates()
nodes = nodes[['Id', 'Label', 'Weight']]

In [None]:
edges.to_csv('../00_shared_data/Tadirah/tadirah_edges.csv', index=False, sep='\t')
nodes.to_csv('../00_shared_data/Tadirah/tadirah_nodes.csv', index=False, sep='\t')

In [None]:
# create a graphviz visualization of edges
from graphviz import Digraph

dot = Digraph(comment='Tadirah')
for i, row in edges.iterrows():
    dot.edge(row['Source'], row['Target'])
    
# show graph in notebook
dot.attr(rankdir='LR')
dot.attr(size='10')
dot.render('../00_shared_data/Tadirah/tadirah', format='png')
dot


In [None]:
# create another visualization with the graphviz engine set to sfdp
dot.engine = 'sfdp'
dot.attr(rankdir='LR')
dot.attr(size='10')
dot.attr(overlap='false')
dot.render('../00_shared_data/Tadirah/tadirah_graph_sfdp', format='png')
dot

