In [19]:
from langchain_community.graphs import OntotextGraphDBGraph
from langchain.chains.graph_qa.ontotext_graphdb import OntotextGraphDBQAChain
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from IPython.display import display, Markdown

import os

In [3]:
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0,
    api_key=OPENAI_API_KEY
)


In [4]:
os.environ["GRAPHDB_USERNAME"] = os.environ.get("GRAPHDB_USERNAME")
os.environ["GRAPHDB_PASSWORD"] = os.environ.get("GRAPHDB_PASSWORD")

In [None]:
graph = OntotextGraphDBGraph(
    query_endpoint="http://localhost:7200/repositories/cykg-rag-1",
    query_ontology="""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

    CONSTRUCT {
        ?entity a ?type .
        ?subClass rdfs:subClassOf ?superClass .
        ?subProperty rdfs:subPropertyOf ?superProperty .
        ?property rdfs:domain ?domain .
        ?property rdfs:range ?range .
    }
    WHERE {
      {
        ?entity a ?type .
        FILTER(?type IN (owl:Class, rdfs:Class, owl:ObjectProperty, owl:DatatypeProperty, rdf:Property))
      }
      UNION
      {
        ?subClass rdfs:subClassOf ?superClass .
        FILTER (?subClass != ?superClass)
      }
      UNION
      {
        ?subProperty rdfs:subPropertyOf ?superProperty .
        FILTER (?subProperty != ?superProperty)
      }
      UNION
      {
        ?property rdfs:domain ?domain .
      }
      UNION
      {
        ?property rdfs:range ?range .
      }
    }
    """,
)

In [49]:
graph.get_schema

'@prefix dc: <http://purl.org/dc/elements/1.1/> .\n@prefix dcterms: <http://purl.org/dc/terms/> .\n@prefix owl: <http://www.w3.org/2002/07/owl#> .\n@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n@prefix vann: <http://purl.org/vocab/vann/> .\n@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n\nrdf:XMLLiteral a rdfs:Class ;\n    rdfs:subClassOf rdfs:Literal .\n\nxsd:nonNegativeInteger a rdfs:Class .\n\nxsd:string a rdfs:Class .\n\n<http://proton.semanticweb.org/protonsys#transitiveOver> a rdf:Property .\n\ndc:creator a rdf:Property .\n\ndc:description a rdf:Property .\n\ndc:rights a rdf:Property .\n\ndc:title a rdf:Property .\n\ndcterms:contributor a rdf:Property .\n\ndcterms:created a rdf:Property .\n\ndcterms:description a rdf:Property .\n\ndcterms:identifier a rdf:Property .\n\ndcterms:modified a rdf:Property .\n\ndcterms:title a rdf:Property .\n\ndcterms:version a rdf:Property .\n\nvann:preferredNamespacePrefix

In [26]:
COMMON_PREFIXES = """
PREFIX agg: <http://jena.apache.org/ARQ/function/aggregate#>
PREFIX : <http://w3id.org/sepses/vocab/ref/attack#>
PREFIX sail: <http://www.openrdf.org/config/sail#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX geof: <http://www.opengis.net/def/function/geosparql/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX fn: <http://www.w3.org/2005/xpath-functions>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX spif: <http://spinrdf.org/spif#>
PREFIX capec: <http://w3id.org/sepses/vocab/ref/capec#>
PREFIX path: <http://www.ontotext.com/path#>
PREFIX array: <http://www.w3.org/2005/xpath-functions/array>
PREFIX apf: <http://jena.apache.org/ARQ/property#>
PREFIX xml: <http://www.w3.org/XML/1998/namespace>
PREFIX rep: <http://www.openrdf.org/config/repository#>
PREFIX map: <http://www.w3.org/2005/xpath-functions/map>
PREFIX sr: <http://www.openrdf.org/config/repository/sail#>
PREFIX wgs: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX gn: <http://www.geonames.org/ontology#>
PREFIX afn: <http://jena.apache.org/ARQ/function#>
PREFIX list: <http://jena.apache.org/ARQ/list#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ofn: <http://www.ontotext.com/sparql/functions/>
PREFIX geoext: <http://rdf.useekm.com/ext#>
PREFIX graphdb: <http://www.ontotext.com/config/graphdb#>
PREFIX math: <http://www.w3.org/2005/xpath-functions/math>
PREFIX omgeo: <http://www.ontotext.com/owlim/geo#>
"""

In [27]:
GRAPHDB_SPARQL_GENERATION_TEMPLATE = """
  Write a SPARQL SELECT query for querying a graph database.
  
  Use these predefined prefixes in your SPARQL queries:
  {prefixes}
  
  The ontology schema delimited by triple backticks in Turtle format is:
  ```
  {schema}
  ```
  
  Use only the classes and properties provided in the schema to construct the SPARQL query.
  Do not use any classes or properties that are not explicitly provided in the SPARQL query.
  Always do a case-insensitive and regex search for any properties related search. Eg: use `FILTER (regex(?description),"sql injection") `. 
  Include all necessary prefixes from the predefined list above.
  Do not include any explanations or apologies in your responses.
  Do not wrap the query in backticks.
  Do not include any text except the SPARQL query generated.
  
  The question delimited by triple backticks is:
  ```
  {prompt}
  ```
  """

GRAPHDB_SPARQL_GENERATION_PROMPT = PromptTemplate(
      input_variables=["schema", "prompt", "prefixes"],
      template=GRAPHDB_SPARQL_GENERATION_TEMPLATE,
  )

In [28]:
GRAPHDB_QA_TEMPLATE = """Task: Generate a natural language response from the results of a SPARQL query.
  You are an assistant that creates well-written and human understandable answers.
  The information part contains the information provided, which you can use to construct an answer.
  The information provided is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
  Make your response sound like the information is coming from an AI assistant, but don't add any information.
  Don't use internal knowledge to answer the question, just say you don't know if no information is available.
  Information:
  {context}
  
  Question: {prompt}
  Helpful Answer:"""
GRAPHDB_QA_PROMPT = PromptTemplate(
      input_variables=["context", "prompt"], template=GRAPHDB_QA_TEMPLATE
  )

In [30]:
def query_graph(user_input):
    chain = OntotextGraphDBQAChain.from_llm(
        llm=llm,
        graph=graph,
        verbose=True,
        return_intermediate_steps=True, 
        sparql_prompt =GRAPHDB_SPARQL_GENERATION_PROMPT,
        qa_prompt=GRAPHDB_QA_PROMPT,
        allow_dangerous_requests=True,  # Set to True to allow dangerous requests
        )
    result = chain.invoke(user_input)
    return result


In [47]:
query_1 = "can you tell me all the techniques under Initial Access?"
query_2 = "What are the techniques used for Privilege Escalation?"
query_3 = "Show 5 available tactics in the database"
query_4 = "What are the mitigations for the 'Data Encrypted for Impact' technique?"

In [48]:
query_graph(query_4)




[1m> Entering new OntotextGraphDBQAChain chain...[0m
Generated SPARQL:
[32;1m[1;3mPREFIX attack: <http://w3id.org/sepses/vocab/ref/attack#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?mitigation
WHERE {
  ?technique a attack:Technique ;
             rdfs:label "Data Encrypted for Impact" ;
             attack:hasMitigation ?mitigation .
}[0m

[1m> Finished chain.[0m


{'query': "What are the mitigations for the 'Data Encrypted for Impact' technique?",
 'result': "I'm sorry, but I don't have any information available regarding the mitigations for the 'Data Encrypted for Impact' technique."}