In [9]:
from django_for_jupyter import init_django
init_django("arches")

import json
import pprint 
import uuid
import pandas as pd
from lxml import etree
from collections import defaultdict
import base64
from IPython.display import Image, display
import matplotlib.pyplot as plt
from urllib.parse import urlparse

from arches.app.models.graph import Graph
from arches.app.models.concept import Concept
from arches.app.models.models import CardXNodeXWidget, Node, Resource2ResourceConstraint, FunctionXGraph, Value, GraphXPublishedGraph
from arches.app.utils.betterJSONSerializer import JSONSerializer, JSONDeserializer
#from operator import itemgetter
from arches.app.models import models

#from arches.app.models.system_settings import settings
from arches.app.utils.data_management.resource_graphs.exporter import get_graphs_for_export as get_json_graph
#from arches.app.utils.data_management.resource_graphs.exporter import create_mapping_configuration_file as get_mapping
#from arches.app.utils.data_management.resource_graphs.exporter import export as export_gephi
#from collections import OrderedDict
from django.core.serializers.json import DjangoJSONEncoder


#_model_uuid_list = [{"model_name": "Museological Item", "uuid": "fa952d08-dd27-11ed-9655-00163e71351b"}]


ONTOLOGY_NAMESPACES = {#'http://my_namespace_here/': 'some_ns',
                    "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "RDF",
                    "http://www.w3.org/2001/XMLSchema#": "xsd",
                    "http://www.w3.org/2000/01/rdf-schema#": "rdfs",
                    "http://www.cidoc-crm.org/cidoc-crm/": "crm",
                    "http://www.ics.forth.gr/isl/CRMarchaeo/": "CRMarchaeo",
                    "http://www.ics.forth.gr/isl/CRMdig/": "CRMdig",
                    "http://www.ics.forth.gr/isl/CRMgeo/": "CRMgeo",
                    "http://www.ics.forth.gr/isl/CRMinf/": "CRMinf",
                    "http://www.cidoc-crm.org/cidoc-crm/CRMsci/": "CRMsci",
                    "http://parthenos.d4science.org/CRMext/CRMpe.rdfs/": "CRMpe",
                    "https://takin.solutions/ontologies/CRMsurv/": "CRMsurv"
}

model_uuid_list = [{"model_name": "Museological Item tw", "uuid": "7886ae5e-009a-11ee-93d6-96a6d2455259"}]

In [10]:
def get_card_x_node_x_widget_data_for_export(resource_graph):
    cards_x_nodes_x_widgets = []
    nodeids = [node["nodeid"] for node in resource_graph["nodes"]]
    cards_x_nodes_x_widgets = CardXNodeXWidget.objects.filter(node_id__in=nodeids)
    return cards_x_nodes_x_widgets

def get_graph(graphid):    
    
    graph = get_json_graph([graphid])
    source_graph = pd.DataFrame(graph['graph'])
    
    nodes_df = pd.DataFrame(graph['graph'][0]['nodes'])
    edges_df = pd.DataFrame(graph['graph'][0]['edges'])
    cards_df = pd.DataFrame(graph['graph'][0]['cards'])
    nodegroups_df = pd.DataFrame(graph['graph'][0]['nodegroups'])
    cards_x_nodes_x_widgets_df = pd.DataFrame(graph['graph'][0]['cards_x_nodes_x_widgets'])
        
    for idx, row in nodes_df.iterrows():
        node_id = row['nodeid']
        nodegroup_id = row['nodegroup_id']
        
        # top node
        if row['istopnode']:
            nodes_df.loc[idx, 'parentproperty'] = 'RDF'
            nodes_df.loc[idx, 'ontologyproperty'] = 'RDF'
            nodes_df.loc[idx, 'card_sortorder'] = -1
            nodes_df.loc[idx, 'domainnode_id'] = 'root'
            nodes_df.loc[idx, 'rangenode_id'] = row['nodeid']
        
        # match edges
        matching_edge_row = edges_df[edges_df['rangenode_id'] == node_id]                
        if not matching_edge_row.empty:            
            nodes_df.loc[idx, 'rangenode_id'] = str(matching_edge_row['rangenode_id'].values[0])
            nodes_df.loc[idx, 'domainnode_id'] = str(matching_edge_row['domainnode_id'].values[0])
            nodes_df.loc[idx, 'ontologyproperty'] = matching_edge_row['ontologyproperty'].values[0]
        
        # match cards
        if not row['istopnode']: ## the top node has no nodegroup_id
            matching_card_row = cards_df[cards_df['nodegroup_id'] == nodegroup_id]        
            if not matching_card_row.empty:            
                name = matching_card_row['name'].values[0]
                nodes_df.loc[idx, 'card_label'] = str(name)
                nodes_df.loc[idx, 'card_sortorder'] = matching_card_row['sortorder'].values[0]
                nodes_df.loc[idx, 'visible'] = matching_card_row['visible'].values[0]
        
        # match cards_x_nodes_x_widgets
        matching_cards_x_nodes_x_widgets_row = cards_x_nodes_x_widgets_df[cards_x_nodes_x_widgets_df['node_id'] == node_id]        	
        if not matching_cards_x_nodes_x_widgets_row.empty:
            nodes_df.loc[idx, 'x_sortorder'] = matching_cards_x_nodes_x_widgets_row['sortorder'].values[0]
            nodes_df.loc[idx, 'x_card_label'] = str(matching_cards_x_nodes_x_widgets_row['label'].tolist()[0]['en'])
            nodes_df.loc[idx, 'x_card_id'] = matching_cards_x_nodes_x_widgets_row['card_id'].values[0]
            nodes_df.loc[idx, 'visible'] = matching_cards_x_nodes_x_widgets_row['visible'].values[0]
                
    return nodes_df.sort_values(by=['card_sortorder'])
    #return cards_x_nodes_x_widgets_df   

#graph = get_graph('7886ae5e-009a-11ee-93d6-96a6d2455259')
#graph.to_csv('sipc/out/temp.csv', index=False)
#graph

In [11]:
def make_uuid_paths(df):
    order = []
    path_dict = {}  
    tree = defaultdict(list)
    for _, row in df.iterrows():
        tree[row['domainnode_id']].append(row['rangenode_id'])
    root = set(df['domainnode_id']).difference(df['rangenode_id']).pop()

    # Make the tree
    def dfs(node, path):
        order.append(node)
        path_dict[node] = path
        for child in tree[node]:
            dfs(child, path + [child])
    dfs(root, [root])

    # Add order to the graph dataset
    id_to_order = {node_id: i for i, node_id in enumerate(order)}
    df['order'] = df['nodeid'].map(id_to_order)

    # Add paths to the dataset
    df['path'] = df['nodeid'].map(path_dict)
    #df['path_text'] = df['nodeid'].map(path_dict).apply(lambda x: ' -> '.join(x))

    df = df.sort_values('order').reset_index(drop=True)
    df.to_csv('sipc/out/sorted_path_graph.csv', index=False)
    return df

In [12]:
def add_namespace(path):

    for k,v in ONTOLOGY_NAMESPACES.items():
        if k in path:
            new_path = path.replace(k,f'{v}:')
            return new_path
    
def make_ns_paths(df):
    df = df.sort_values('order').reset_index(drop=True)
    path_list = []
    path_dict = {}
    n=0
    for idx, row in df.iterrows():
        full_path = row['path']        
        for path in full_path:
            if path == 'root':
                ns_path = 'RDF'
            else:
                matching_row = df[df['nodeid'] == path]
                if (n % 2) == 0:
                    #ns_path = matching_row['parentproperty'].values[0]
                    ns_path = add_namespace(matching_row['parentproperty'].values[0])
                    #print(ns_path)
                else:   
                    #ns_path = matching_row['ontologyclass'].values[0]
                    ns_path = add_namespace(matching_row['ontologyclass'].values[0])
                    #print(n, ns_path)
            path_list.append(ns_path)
            ns_path = ''
            n+=1        
        n=0        
        df.loc[idx, 'ns_path'] = str(path_list)
        path_list = []
    return df

In [13]:
def make_rec_defs(graph):
    graph = graph.sort_values('order').reset_index(drop=True)
    
    
    
    for idx, row in graph.iterrows():
        path_list = eval(row['ns_path'])
        path_length = len(path_list)
        tag = str(path_list[-1])
        
        if path_length == 2:
            root = etree.Element("root", tag="RDF") 
            node_map = etree.SubElement(root, "node-mapping", inputPath="/input")
            elem2 = etree.SubElement(root, "elem", tag = tag)
            attr = etree.SubElement(elem2, "attr", uriCheck="True", tag = "rdf:about")                           
        elif path_length == 3:
            elem3 = etree.SubElement(elem2, "elem", tag = tag, label=row['name'], datatype=row['datatype'], isrequired = str(row['isrequired']).lower(), isvisible = str(row['visible']).lower())
            if row['datatype'] == 'semantic':
                elem3.attrib['unmappable']= 'true'        
            elif row['datatype'] == 'resource-instance':
                pass
                #elem3.attrib['uriCheck']= 'true'
                #elem3.attrib['attrs']= 'rdf:about'
                #elem3.attrib['simple']= 'true'
            #else:
            #    elem3.attrib['simple']= 'true'                        
        elif path_length == 4:
            elem4 = etree.SubElement(elem3, "elem", tag = tag, label = row['name'], datatype=row['datatype'], isrequired = str(row['isrequired']).lower(), isvisible = str(row['visible']).lower())
            if row['datatype'] == 'semantic':
                elem4.attrib['unmappable']= 'true'        
            elif row['datatype'] == 'resource-instance':
                elem4.attrib['uriCheck']= 'true'
                elem4.attrib['attrs']= 'rdf:about'
                elem4.attrib['simple']= 'true'
            else:
                elem4.attrib['simple']= 'true'            
        elif path_length == 5:
            elem5 = etree.SubElement(elem4, "elem", tag = tag, label = row['name'], datatype=row['datatype'], isrequired = str(row['isrequired']).lower(), isvisible = str(row['visible']).lower())
            if row['datatype'] == 'semantic':
                elem4.attrib['unmappable']= 'true'        
            elif row['datatype'] == 'resource-instance':
                pass
                #elem5.attrib['uriCheck']= 'true'
                #elem5.attrib['attrs']= 'rdf:about'
                #elem5.attrib['simple']= 'true'
            #else:
            #    elem5.attrib['simple']= 'true'            
        elif path_length == 6:
            elem6 = etree.SubElement(elem5, "elem", tag = tag, label = row['name'], datatype=row['datatype'], isrequired = str(row['isrequired']).lower(), isvisible = str(row['visible']).lower())
            if row['datatype'] == 'semantic':
                elem4.attrib['unmappable']= 'true'        
            elif row['datatype'] == 'resource-instance':
                elem6.attrib['uriCheck']= 'true'
                elem6.attrib['attrs']= 'rdf:about'
                elem6.attrib['simple']= 'true'
            else:
                elem6.attrib['simple']= 'true'            
        elif path_length == 7:
            elem7 = etree.SubElement(elem6, "elem", tag = tag, label = row['name'], datatype=row['datatype'], isrequired = str(row['isrequired']).lower(), isvisible = str(row['visible']).lower())
            if row['datatype'] == 'semantic':
                elem7.attrib['unmappable']= 'true'        
            #elif row['datatype'] == 'resource-instance':
            #    elem7.attrib['uriCheck']= 'true'
            #    elem7.attrib['attrs']= 'rdf:about'
            #    elem7.attrib['simple']= 'true'
            #else:
            #    elem7.attrib['simple']= 'true'            
        elif path_length == 8:
            elem8 = etree.SubElement(elem7, "elem", tag = tag, label = row['name'], datatype=row['datatype'], isrequired = str(row['isrequired']).lower(), isvisible = str(row['isvisible']).lower())
            if row['datatype'] == 'semantic':
                elem8.attrib['unmappable']= 'true'        
            elif row['datatype'] == 'resource-instance':
                elem8.attrib['uriCheck']= 'true'
                elem8.attrib['attrs']= 'rdf:about'
                elem8.attrib['simple']= 'true'
            else:
                elem8.attrib['simple']= 'true'            
        else:
            pass
        
        
    return etree.tostring(root, encoding='utf-8', method='xml', pretty_print=True).decode() 



#rec_def = make_rec_defs(ns_path_graph)
#print(rec_def)

In [14]:
#model_uuid_list = [{"model_name": "Museological Item tw", "uuid": "7886ae5e-009a-11ee-93d6-96a6d2455259"}]

graph = get_graph(model_uuid_list[0]['uuid'])
graph.to_csv('sipc/out/new_graph.csv', index=False)
path_graph = make_uuid_paths(graph)
ns_path_graph = make_ns_paths(path_graph)
ns_path_graph.to_csv('sipc/out/path_graph.csv', index=False)
rec_def = make_rec_defs(ns_path_graph)
print(rec_def)
#graph

<root tag="RDF">
  <node-mapping inputPath="/input"/>
  <elem tag="crm:E22_Human-Made_Object">
    <attr uriCheck="True" tag="rdf:about"/>
    <elem tag="crm:P1_is_identified_by" label="Identifier" datatype="semantic" isrequired="false" isvisible="true" unmappable="true">
      <elem tag="crm:E55_Type" label="Identifier Type" datatype="resource-instance" isrequired="false" isvisible="true" uriCheck="true" attrs="rdf:about" simple="true"/>
      <elem tag="rdfs:Literal" label="Identifier Content" datatype="string" isrequired="false" isvisible="true" simple="true"/>
    </elem>
    <elem tag="crm:P128_carries" label="Subject" datatype="semantic" isrequired="false" isvisible="true" unmappable="true">
      <elem tag="crm:E4_Period" label="Subject_period" datatype="resource-instance" isrequired="false" isvisible="true" uriCheck="true" attrs="rdf:about" simple="true" unmappable="true">
        <elem tag="crm:P3_has_note" label="Subject_period Note" datatype="string" isrequired="false" isvis