In [1]:
from django_for_jupyter import init_django
init_django("arches")

import json
import pprint 
import uuid
import pandas as pd
from lxml import etree
from collections import defaultdict
import base64
from IPython.display import Image, display
import matplotlib.pyplot as plt
from urllib.parse import urlparse

from arches.app.models.graph import Graph
from arches.app.models.concept import Concept
from arches.app.models.models import CardXNodeXWidget, Node, Resource2ResourceConstraint, FunctionXGraph, Value, GraphXPublishedGraph
from arches.app.utils.betterJSONSerializer import JSONSerializer, JSONDeserializer
#from operator import itemgetter
from arches.app.models import models

#from arches.app.models.system_settings import settings
from arches.app.utils.data_management.resource_graphs.exporter import get_graphs_for_export as get_json_graph
from arches.app.utils.data_management.resource_graphs.exporter import create_mapping_configuration_file as get_mapping
#from arches.app.utils.data_management.resource_graphs.exporter import export as export_gephi
#from collections import OrderedDict
from django.core.serializers.json import DjangoJSONEncoder



_model_uuid_list = [{"model_name": "7886ae5e-009a-11ee-93d6-96a6d2455259", "uuid": "7886ae5e-009a-11ee-93d6-96a6d2455259"}, 
                  ]

ONTOLOGY_NAMESPACES = {#'http://my_namespace_here/': 'some_ns',
                    "http://purl.org/dc/elements/1.1/": "dc",
                    "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "RDF",
                    "http://www.w3.org/2001/XMLSchema#": "xsd",
                    "http://www.w3.org/2000/01/rdf-schema#": "rdfs",
                    "http://www.cidoc-crm.org/cidoc-crm/CRMsci/": "CRMsci",
                    "http://www.cidoc-crm.org/cidoc-crm/": "crm",
                    "http://www.ics.forth.gr/isl/CRMarchaeo/": "CRMarchaeo",
                    "http://www.ics.forth.gr/isl/CRMdig/": "CRMdig",
                    "http://www.ics.forth.gr/isl/CRMgeo/": "CRMgeo",
                    "http://www.ics.forth.gr/isl/CRMinf/": "CRMinf",
                    "http://parthenos.d4science.org/CRMext/CRMpe.rdfs/": "CRMpe",
                    "https://takin.solutions/ontologies/CRMsurv/": "CRMsurv"
}

model_uuid_list = {"model_name": "7886ae5e-009a-11ee-93d6-96a6d2455259", "uuid": "7886ae5e-009a-11ee-93d6-96a6d2455259"}

In [2]:
def get_card_x_node_x_widget_data_for_export(resource_graph):
    cards_x_nodes_x_widgets = []
    nodeids = [node["nodeid"] for node in resource_graph["nodes"]]
    cards_x_nodes_x_widgets = CardXNodeXWidget.objects.filter(node_id__in=nodeids)
    return cards_x_nodes_x_widgets

def get_graph(graphid):    
    
    graph = get_json_graph([graphid])
    source_graph = pd.DataFrame(graph['graph'])
    
    nodes_df = pd.DataFrame(graph['graph'][0]['nodes'])
    edges_df = pd.DataFrame(graph['graph'][0]['edges'])
    cards_df = pd.DataFrame(graph['graph'][0]['cards'])
    nodegroups_df = pd.DataFrame(graph['graph'][0]['nodegroups'])
    cards_x_nodes_x_widgets_df = pd.DataFrame(graph['graph'][0]['cards_x_nodes_x_widgets'])
        
    for idx, row in nodes_df.iterrows():
        node_id = row['nodeid']
        nodegroup_id = row['nodegroup_id']
        nodes_df.loc[idx, 'id'] = node_id
        nodes_df.loc[idx, 'elem_name'] = row['ontologyclass']
        nodes_df.loc[idx, 'elem_type'] = 'class'
        
        # top node
        if row['istopnode']:
            nodes_df.loc[idx, 'parentproperty'] = 'RDF'
            nodes_df.loc[idx, 'ontologyproperty'] = 'RDF'
            nodes_df.loc[idx, 'card_sortorder'] = -1
            nodes_df.loc[idx, 'parent_id'] = 'root'
            nodes_df.loc[idx, 'child_id'] = row['nodeid']
            nodes_df.loc[idx, 'visible'] = True
        
        # match edges to nodes
        matching_edge_row = edges_df[edges_df['rangenode_id'] == node_id]                
        if not matching_edge_row.empty:            
            nodes_df.loc[idx, 'edgeid'] = str(matching_edge_row['edgeid'].values[0])
            add_parent_id = edges_df[edges_df['rangenode_id'] == node_id]                
            if not add_parent_id.empty:
                nodes_df.loc[idx, 'parent_id'] = str(add_parent_id['edgeid'].values[0])
                #nodes_df.loc[idx, 'child_id'] = str(matching_edge_row['domainnode_id'].values[0])
            nodes_df.loc[idx, 'child_id'] = node_id
            
        # match cards
        if not row['istopnode']: ## the top node has no nodegroup_id
            matching_card_row = cards_df[cards_df['nodegroup_id'] == nodegroup_id]        
            if not matching_card_row.empty:            
                name = matching_card_row['name'].values[0]
                nodes_df.loc[idx, 'card_label'] = str(name)
                nodes_df.loc[idx, 'card_sortorder'] = matching_card_row['sortorder'].values[0]
                nodes_df.loc[idx, 'instructions'] = str(matching_card_row['instructions'].values[0])
                nodes_df.loc[idx, 'helptitle'] = str(matching_card_row['helptitle'].values[0])
                nodes_df.loc[idx, 'helptext'] = str(matching_card_row['helptext'].values[0])
                nodes_df.loc[idx, 'visible'] = str(matching_card_row['visible'].values[0])

                
                
        # match cards_x_nodes_x_widgets
        #matching_cards_x_nodes_x_widgets_row = cards_x_nodes_x_widgets_df[cards_x_nodes_x_widgets_df['node_id'] == node_id]
        #print(matching_cards_x_nodes_x_widgets_row)
        #if not matching_cards_x_nodes_x_widgets_row.empty:
        #    nodes_df.loc[idx, 'x_sortorder'] = matching_cards_x_nodes_x_widgets_row['sortorder'].values[0]
        #    nodes_df.loc[idx, 'x_card_label'] = str(matching_cards_x_nodes_x_widgets_row['label'].tolist()[0]['en'])
        #    nodes_df.loc[idx, 'x_card_id'] = matching_cards_x_nodes_x_widgets_row['card_id'].values[0]
        #    nodes_df.loc[idx, 'visible'] = matching_cards_x_nodes_x_widgets_row['visible'].values[0]
        
        # add edges to nodes
        add_edge_row = edges_df[edges_df['rangenode_id'] == node_id]                
        if not matching_edge_row.empty:   
            new_row_df = pd.DataFrame({
                'elem_type': 'property',
                'elem_name': str(matching_edge_row['ontologyproperty'].values[0]),
                'is_collector': False,
                'nodeid': '',
                'name': '',
                'description': '', 
                'istopnode': False,
                'ontologyclass': '',
                'datatype': 'property',
                'nodegroup_id': '',
                'graph_id': row['graph_id'],
                'config': '',
                'issearchable': '',
                'isrequired': True,
                'sortorder': '',
                'fieldname': '',
                'exportable': '',
                'alias': '',          
                'hascustomalias': '',
                'parentproperty': '',
                'id': str(add_edge_row['edgeid'].values[0]),
                'edgeid': str(add_edge_row['edgeid'].values[0]), 
                'parent_id': str(add_edge_row['domainnode_id'].values[0]),
                'child_id': str(add_edge_row['edgeid'].values[0]), #str(add_edge_row['rangenode_id'].values[0]),
                'card_label': '',
                'card_sortorder': 0,
                'visible': True,
                'ontologyproperty': ''
            }, index=[0])

            #nodes_df.loc[len(nodes_df.index)] = new_row_df
            nodes_df = pd.concat([nodes_df, new_row_df])   
            #print(len(new_row))    

    
    return nodes_df.sort_values(by=['card_sortorder'])
    #return new_row_df
#graph = get_graph('af04eac2-a131-11ed-a102-9cf387da2c40')
#graph.to_csv('sipc/out/temp.csv', index=False)
#graph

In [3]:
from collections import defaultdict
from typing import List, Dict


# Function to generate paths
def generate_paths(tree: Dict, node: str, path: List = []):
    path = path + [node]
    
    paths = [path]
    if node in tree:
        for child in tree[node]:
            paths.extend(generate_paths(tree, child, path))
    return paths

def make_uuid_paths(df):
    tree = df.groupby('parent_id')['child_id'].apply(list).to_dict()
    # Generate paths
    paths = generate_paths(tree, 'root')

    # Convert paths to DataFrame
    paths_df = pd.DataFrame([(path[-1], i, path) for i, path in enumerate(paths)], columns=['id', 'order', 'path'])

    # Merge paths_df with the existing df based on 'id'
    merged_df = pd.merge(graph, paths_df, on='id', how='left')
    merged_df.to_csv('sipc/out/merged.csv', index=False)
    return merged_df


In [4]:
def add_namespace(path):

    for k,v in ONTOLOGY_NAMESPACES.items():
        if k in path:
            new_path = path.replace(k,f'{v}:')
            return new_path
    
def make_ns_paths(df):
    
    #df = df['card_sortorder'].astype(int)
    df = df.sort_values('order').reset_index(drop=True)
    path_list = []
    path_dict = {}
    n=0
    for idx, row in df.iterrows():
        full_path = row['path']        
        #print(full_path)
        for path in full_path:

            if path == 'root':
                ns_path = 'RDF'
            else:
                matching_row = df[df['id'] == path]
                ns_path = add_namespace(matching_row['elem_name'].values[0])
                #print(matching_row)
                if (n % 2) == 0:
                    pass
                    #print(matching_row['elem_name'])
                    #ns_path = add_namespace(matching_row['elem_name'].values[0])
                else:   
                    pass
                    #ns_path = add_namespace(matching_row['elem_name'].values[0])
            path_list.append(ns_path)
            ns_path = ''
            n+=1        
        n=0        
        df.loc[idx, 'ns_path'] = str(path_list)
        path_list = []
    return df

In [5]:
def make_opts(graph, concepts):
    
    ns_list = make_doc_paths(graph)
    root = etree.Element("opts")    
    for item in ns_list:
        opt_path = item['path']
                
        for k,v in json.loads(concepts).items():
            if item['name'] == k:
            
                opt_path = '/'.join(item['path'])
                opt_list = etree.SubElement(root, "opt-list", dictionary=item['name'], path=opt_path, displayName=item['name'])  
                for _opt in v.values():
                    opt = etree.SubElement(opt_list, "opt", value=_opt)

    _opts = etree.tostring(root, encoding='utf-8', method='xml', pretty_print=True).decode()        
    print(_opts)


def get_concepts_as_json(_uuid):
    concept_file = get_mapping(_uuid)
    return [concept_file[0]['outputfile'].getvalue()]

_concepts = get_concepts_as_json(model_uuid_list['uuid'])

print(_concepts[0])

all_opts = make_opts(graph, _concepts[0])
print(all_opts)

{
    "Resource to Resource Relationship Types": {
        "ac41d9be-79db-4256-b368-2f4559cfbe55": "is related to"
    }
}


NameError: name 'graph' is not defined

In [20]:
#model_uuid_list = [{"model_name": "Museological Item tw", "uuid": "7886ae5e-009a-11ee-93d6-96a6d2455259"}]

graph = get_graph(model_uuid_list['uuid'])
graph.to_csv('sipc/out/new_graph.csv', index=False)
path_graph = make_uuid_paths(graph)
ns_path_graph = make_ns_paths(path_graph)
ns_path_graph.to_csv('sipc/out/ns_path_graph.csv', index=False)
#opts = make_opts(ns_path_graph)
#print(rec_def)
#graph




TypeError: make_opts() missing 1 required positional argument: 'concepts'