In [1]:
%status

{'status': 'healthy',
 'startTime': 'Thu Jun 30 11:53:17 UTC 2022',
 'dbEngineVersion': '1.1.1.0.R3',
 'role': 'writer',
 'dfeQueryEngine': 'viaQueryHint',
 'gremlin': {'version': 'tinkerpop-3.5.2'},
 'sparql': {'version': 'sparql-1.1'},
 'opencypher': {'version': 'Neptune-9.0.20190305-1.0'},
 'labMode': {'ObjectIndex': 'disabled',
  'ReadWriteConflictDetection': 'enabled'},
 'features': {'ResultCache': {'status': 'disabled'},
  'IAMAuthentication': 'disabled',
  'Streams': 'disabled',
  'AuditLog': 'disabled'},
 'settings': {'clusterQueryTimeoutInMs': '120000'}}

In [2]:
from SPARQLWrapper import SPARQLWrapper, POST
from PlanQK_extraction_v4 import *
from QAZ_extraction_v5 import *
import ast

### Data ingestion

In [3]:
url_qaz = "https://raw.githubusercontent.com/MIBbrandon/stephenjordan.github.io/master/src/storage/"
url_planqk = "https://raw.githubusercontent.com/PlanQK/semantic-services/master/extracted%20knowledge/"

In [4]:
data_qaz = [triples_algorithms(url_qaz), triples_publications(url_qaz)]
data_planqk = [triples_applicationareas(url_planqk), triples_software(url_planqk), triples_problems(url_planqk)]

In [5]:
def check_input(o):
    prefixes = ["cs:", "dc:","mls:","muo:","org:","owl:","rdf:","xml:","xsd:","rdfs:","terms:","parameter:","Publication:",
                "quantumshare:","Implementation:","ProblemExecution:","QuantumAlgorithm:","cpannotationschema:"]
    if type(o) == int:
        return o
    elif any(i in o for i in prefixes): 
        return o
    else:
        o = f'"{o}"'
        return o

In [6]:
def ingestion(triplet):
    triple = triplet
    triple['s'] = check_input(triple['s'])
    triple['o'] = check_input(triple['o']) #prevents bad queries for labels, strings, and integers
    print(triple)
    sparql = SPARQLWrapper("https://database-1-jm.cluster-csbkotxlmqjb.eu-west-1.neptune.amazonaws.com:8182/sparql")
    sparql.setMethod(POST)

    sparql.setQuery("""PREFIX cs: <http://mklab.iti.gr/pericles/ComputerSystem_ODP#> 
                PREFIX dc: <http://purl.org/dc/elements/1.1/> 
                PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#> 
                PREFIX mls: <http://www.w3.org/ns/mls/cp#> 
                PREFIX muo: <http://elite.polito.it/ontologies/muo-vocab.owl#> 
                PREFIX org: <http://www.w3.org/ns/org#> 
                PREFIX owl: <http://www.w3.org/2002/07/owl#> 
                PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
                PREFIX xml: <http://www.w3.org/XML/1998/namespace> 
                PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> 
                PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
                PREFIX terms: <http://purl.org/dc/terms/> 
                PREFIX parameter: <http://www.semanticweb.org/20173656/ontologies/2022/5/Parameter#> 
                PREFIX Publication: <http://www.semanticweb.org/20173656/ontologies/2022/5/Publication#> 
                PREFIX quantumshare: <http://www.semanticweb.org/20173656/ontologies/2022/5/QuantumShare#> 
                PREFIX Implementation: <http://www.semanticweb.org/20173656/ontologies/2022/5/QuantumImplementation#> 
                PREFIX ProblemExecution: <http://www.semanticweb.org/20173656/ontologies/2022/5/ProblemExecution#> 
                PREFIX QuantumAlgorithm: <http://www.semanticweb.org/20173656/ontologies/2022/5/QuantumAlgorithm#> 
                PREFIX cpannotationschema: <http://www.ontologydesignpatterns.org/schemas/cpannotationschema.owl#> 
                INSERT DATA {{{s} {p} {o}}}""".format(s=triple['s'], p=triple['p'], o=triple['o'])
    )

    results = sparql.query()
    print(results.response.read())

#### Github Repositories

In [8]:
for function in data_planqk:
    for i in function:
        ingestion(i) 

{'s': 'quantumshare:Engineering_Science', 'p': 'rdfs:subClassOf', 'o': 'ProblemExecution:Application_Area'}
b'[\n{\n    "type" : "UpdateEvent",\n    "totalElapsedMillis" : 0,\n    "elapsedMillis" : 0,\n    "connFlush" : 0,\n    "batchResolve" : 0,\n    "whereClause" : 0,\n    "deleteClause" : 0,\n    "insertClause" : 0\n},\n{\n    "type" : "Commit",\n    "totalElapsedMillis" : 3\n}\n]'
{'s': 'quantumshare:Engineering_Science', 'p': 'rdfs:comment', 'o': '"Subclasses of the concept Application Area are reused from FAIRsharing Subject Ontology (SRAO, licensed under CC BY 4.0)."'}
b'[\n{\n    "type" : "UpdateEvent",\n    "totalElapsedMillis" : 0,\n    "elapsedMillis" : 0,\n    "connFlush" : 0,\n    "batchResolve" : 0,\n    "whereClause" : 0,\n    "deleteClause" : 0,\n    "insertClause" : 0\n},\n{\n    "type" : "Commit",\n    "totalElapsedMillis" : 9\n}\n]'
{'s': 'quantumshare:Civil_Engineering', 'p': 'rdfs:subClassOf', 'o': 'quantumshare:Engineering_Science'}
b'[\n{\n    "type" : "UpdateEv

#### Text files about publications

In [9]:
def txt_to_triples(path):
    with open(path) as f:
        triples = f.read().splitlines()
        triples = [ast.literal_eval(i) for i in triples]
    return triples

In [10]:
data_application = txt_to_triples('ExxonMobil_triples.txt')
data_implementation = txt_to_triples('Qiskit_triples.txt')

In [12]:
for i in data_implementation:
    ingestion(i)

{'s': 'quantumshare:Qiskit', 'p': 'rdfs:subClassOf', 'o': '"<http://www.semanticweb.org/20173656/ontologies/2022/4/QuantumComputingV2#Quantum_Implementation>"'}
b'[\n{\n    "type" : "UpdateEvent",\n    "totalElapsedMillis" : 0,\n    "elapsedMillis" : 0,\n    "connFlush" : 0,\n    "batchResolve" : 0,\n    "whereClause" : 0,\n    "deleteClause" : 0,\n    "insertClause" : 0\n},\n{\n    "type" : "Commit",\n    "totalElapsedMillis" : 7\n}\n]'
{'s': 'quantumshare:IBM', 'p': 'rdfs:subClassOf', 'o': 'Implementation:Implementer'}
b'[\n{\n    "type" : "UpdateEvent",\n    "totalElapsedMillis" : 0,\n    "elapsedMillis" : 0,\n    "connFlush" : 0,\n    "batchResolve" : 0,\n    "whereClause" : 0,\n    "deleteClause" : 0,\n    "insertClause" : 0\n},\n{\n    "type" : "Commit",\n    "totalElapsedMillis" : 3\n}\n]'
{'s': 'quantumshare:Qiskit', 'p': 'Implementation:involvesOrganization', 'o': 'quantumshare:IBM'}
b'[\n{\n    "type" : "UpdateEvent",\n    "totalElapsedMillis" : 0,\n    "elapsedMillis" : 0,\n