In [25]:
import yaml
from linkml.generators import shaclgen, owlgen
from linkml.validator import validate

from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import requests

from neo4j import GraphDatabase
import hashlib

In [26]:
def generate_said(data):
    data_string = str(data).encode()
    hash_object = hashlib.sha256()
    hash_object.update(data_string)
    return hash_object.hexdigest()

In [8]:
with open('/Users/maximilianstaebler/code/DLR/GX4FM-Base-X/SemanticDataLink/linkml/yaml/verkerhszaehlung_hamburg.yaml', 'r') as file:
    input_yaml = yaml.safe_load(file)

shaclGraph = shaclgen.ShaclGenerator(str(input_yaml)).as_graph()
owlGraph = owlgen.OwlSchemaGenerator(str(input_yaml)).as_graph()

# print(shaclGraph.serialize())
# print(owlGraph.serialize())



# metamodel_version: 1.7.0


# Neo4j

```cypher
    query = f"""
        MERGE (n:`{label}` {{{properties_string}}})
        RETURN n
    """
```

Label will be used in neo4j to reference the different unique nodes. Therefore `label` should be used with a unique **uri**!

In [22]:
from neo4j import GraphDatabase

class Neo4jConnection:

    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__password = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__password))
        except Exception as e:
            print("Failed to create the driver:", e)
    
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
    
    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try:
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally:
            if session is not None:
                session.close()
        return response

conn = Neo4jConnection(uri="neo4j://localhost:7687", user="neo4j", pwd="mast#mast1")
print(conn)

<__main__.Neo4jConnection object at 0x161729550>


In [67]:
def add_node_if_not_exists(conn, label, said, node_type = 'dataset', additional_properties=None):
    
    if node_type not in ['dataset', 'service', 'infrastructure']:
        return 'TypeError'
    
    # Using string formatting to insert the label dynamically
    # MERGE is used only with label and node_id
    merge_query = f"""
    MERGE (n:`{node_type}` {{label: $label, said: $said}})
    """

    # Building a query to set additional properties
    # parameters = {'name': name, 'id': said, 'node_type': node_type}
    set_properties_query = ""
    if additional_properties:
        set_properties_query = " SET " + ", ".join([f"n.{key} = ${key}" for key in additional_properties.keys()])
        # parameters.update(additional_properties)

    # Combining the queries
    query = merge_query + set_properties_query + " RETURN n"

    # Preparing parameters, including additional properties
    parameters = {'label': label, 'said': said}
    if additional_properties:
        parameters.update(additional_properties)

    result = conn.query(query, parameters)
    return result

def create_edges(conn, property_1, node_type_1, attr_1, property_2, node_type_2, attr_2, relationship_type):
    query = f"""
    MATCH (a:`{node_type_1}`), (b:`{node_type_2}`)
    WHERE a.`{property_1}` = '{attr_1}' AND b.`{property_2}` = '{attr_2}'
    MERGE (a)-[r:`{relationship_type}`]->(b)
    RETURN a, b, r
    """
    return conn.query(query)

In [41]:
print(f"id: {input_yaml['id']}")
print(f"name: {input_yaml['name']}")
print(f"prefixes: {input_yaml['prefixes']}")
print(f"Attributes: {input_yaml['classes']['Verkehrszählstellen_Hamburg']['attributes'].keys()}")
print(f"Attribute Example: {input_yaml['classes']['Verkehrszählstellen_Hamburg']['attributes']['location']}")
print(f"Overlay Example: {input_yaml['classes']['Verkehrszählstellen_Hamburg']['attributes']['overlay_standard']}")
print(f"enums: {input_yaml['enums']}")

id: https://base-x-ecosystem.com/dlr-ki/
name: Verkehrszählstellen_Hamburg
prefixes: {'linkml': 'https://w3id.org/linkml/', 'sdl': 'https://raw.githubusercontent.com/GX4FM-Base-X/SemanticDataLink/main/oca/overlays/sdlOntology.ttl#', 'wlo': 'http://purl.org/ontology/wo/name#', 'mv': 'http://schema.mobivoc.org/id#', 'swpo': 'http://sw-portal.deri.org/ontologies/swportal#', 'geom': 'http://data.ign.fr/def/geometrie#', 'dlr-ki': 'https://base-x-ecosystem.com/dlr-ki/'}
Attributes: dict_keys(['bezeichnung', 'name', 'id', 'last_count', 'geom', 'position', 'location', 'CRS', 'overlay_domain', 'overlay_usageFrequency', 'overlay_information', 'overlay_standard', 'overlay_label'])
Attribute Example: {'title': 'location', 'multivalued': False, 'identifier': False, 'required': True, 'range': 'string', 'slot_uri': 'swpo:hasLocation'}
Overlay Example: {'title': 'standard', 'slot_uri': 'sdl:standard', 'range': 'string', 'multivalued': False, 'required': True, 'pattern': ''}
enums: {'CRS_ENUM': {'permi

In [42]:
dataset_service_said = generate_said(input_yaml)
print(dataset_service_said)

40570d66060046e2660fe1390543b52619d1eba4e51d5967dd87a6ff33bd7c2d


In [68]:
# Types: 'dataset', 'service', 'infrastructure'
response = add_node_if_not_exists(conn, label=input_yaml['id'], said=dataset_service_said, node_type='dataset', additional_properties={'name':input_yaml['name']})
print(response)
response = add_node_if_not_exists(conn, label=input_yaml['id'], said=dataset_service_said, node_type='service', additional_properties={'name':input_yaml['name']})
print(response)
response = add_node_if_not_exists(conn, label=input_yaml['id'], said=dataset_service_said, node_type='infrastructure', additional_properties={'name':input_yaml['name']})
print(response)

[<Record n=<Node element_id='4:71d9652d-6ac3-4744-9806-13e6489abd58:0' labels=frozenset({'dataset'}) properties={'name': 'Verkehrszählstellen_Hamburg', 'label': 'https://base-x-ecosystem.com/dlr-ki/', 'said': '40570d66060046e2660fe1390543b52619d1eba4e51d5967dd87a6ff33bd7c2d'}>>]
[<Record n=<Node element_id='4:71d9652d-6ac3-4744-9806-13e6489abd58:1' labels=frozenset({'service'}) properties={'name': 'Verkehrszählstellen_Hamburg', 'label': 'https://base-x-ecosystem.com/dlr-ki/', 'said': '40570d66060046e2660fe1390543b52619d1eba4e51d5967dd87a6ff33bd7c2d'}>>]
[<Record n=<Node element_id='4:71d9652d-6ac3-4744-9806-13e6489abd58:2' labels=frozenset({'infrastructure'}) properties={'name': 'Verkehrszählstellen_Hamburg', 'label': 'https://base-x-ecosystem.com/dlr-ki/', 'said': '40570d66060046e2660fe1390543b52619d1eba4e51d5967dd87a6ff33bd7c2d'}>>]


In [38]:
dict(response[0])

{'n': <Node element_id='4:71d9652d-6ac3-4744-9806-13e6489abd58:0' labels=frozenset({'https://base-x-ecosystem.com/dlr-ki/'}) properties={'id': '40570d66060046e2660fe1390543b52619d1eba4e51d5967dd87a6ff33bd7c2d'}>}

In [35]:
response.data()

AttributeError: 'list' object has no attribute 'data'

In [3]:
from neo4j import GraphDatabase

class GraphManager:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def create_dataset_or_service(self, properties, overlays):
        with self.driver.session() as session:
            session.write_transaction(self._create_nodes_and_edges, properties, overlays)

    @staticmethod
    def _create_nodes_and_edges(tx, properties, overlays):
        # Create the main dataset/service node
        create_node_query = """
        CREATE (ds:DatasetService {props})
        RETURN id(ds) AS nodeId
        """
        main_node_id = tx.run(create_node_query, props=properties).single()["nodeId"]

        for overlay in overlays:
            # Create overlay node
            overlay_node_query = """
            CREATE (o:Overlay {props}) 
            RETURN id(o) AS overlayId
            """
            overlay_props = {k: v for k, v in overlay.items() if k != "range" and k != "description"}
            overlay_id = tx.run(overlay_node_query, props=overlay_props).single()["overlayId"]

            # Create edges
            if "range" in overlay:
                tx.run("MATCH (ds:DatasetService), (o:Overlay) WHERE id(ds) = $mainId AND id(o) = $overlayId "
                       "CREATE (o)-[:RDFS_DATATYPE {range: $range}]->(ds)", mainId=main_node_id, overlayId=overlay_id, range=overlay["range"])

            if "description" in overlay:
                tx.run("MATCH (ds:DatasetService), (o:Overlay) WHERE id(ds) = $mainId AND id(o) = $overlayId "
                       "CREATE (o)-[:RDFS_COMMENT {description: $description}]->(ds)", mainId=main_node_id, overlayId=overlay_id, description=overlay["description"])


In [5]:
uri = "bolt://localhost:7687"
user = "neo4j"
password = "mast#mast1"

graph_manager = GraphManager(uri, user, password)

In [6]:
properties = {
    "Provider": "Provider Name",
    "DatasetServiceName": "Dataset Name",
    # Include other mandatory and optional properties here
}

overlays = [
    {
        "range": "Range Value",
        "description": "Description Value",
        # Include other optional overlay properties here
    },
    # More overlays can be added here
]

graph_manager.create_dataset_or_service(properties, overlays)
graph_manager.close()

  session.write_transaction(self._create_nodes_and_edges, properties, overlays)


CypherSyntaxError: {code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input '{': expected a parameter (line 2, column 27 (offset: 35))
"        CREATE (ds:DatasetService {props})"
                                   ^}