In [31]:
from langchain_community.graphs import OntotextGraphDBGraph
from langchain.chains.graph_qa.ontotext_graphdb import OntotextGraphDBQAChain
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from IPython.display import display, Markdown

import os

In [71]:
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
llm = ChatOpenAI(
    model="gpt-4o",
    api_key=OPENAI_API_KEY
)


In [33]:
os.environ["GRAPHDB_USERNAME"] = os.environ.get("GRAPHDB_USERNAME")
os.environ["GRAPHDB_PASSWORD"] = os.environ.get("GRAPHDB_PASSWORD")

In [52]:
graph = OntotextGraphDBGraph(
    query_endpoint="http://localhost:7200/repositories/cykg-rag-1",
    query_ontology="""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

    CONSTRUCT {
        ?entity a ?type .
        ?subClass rdfs:subClassOf ?superClass .
        ?subProperty rdfs:subPropertyOf ?superProperty .
        ?property rdfs:domain ?domain .
        ?property rdfs:range ?range .
    }
    WHERE {
      {
        ?entity a ?type .
        FILTER(?type IN (owl:Class, rdfs:Class, owl:ObjectProperty, owl:DatatypeProperty, rdf:Property))
      }
      UNION
      {
        ?subClass rdfs:subClassOf ?superClass .
        FILTER (?subClass != ?superClass)
      }
      UNION
      {
        ?subProperty rdfs:subPropertyOf ?superProperty .
        FILTER (?subProperty != ?superProperty)
      }
      UNION
      {
        ?property rdfs:domain ?domain .
      }
      UNION
      {
        ?property rdfs:range ?range .
      }
    }
    """,
)

In [None]:
# graph = OntotextGraphDBGraph(
#     query_endpoint="http://localhost:7200/repositories/cykg-rag-1",
#     local_file="../ontology/attack_ics_ontology.ttl",
# )

In [62]:
COMMON_PREFIXES = """
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX attack: <http://w3id.org/sepses/vocab/ref/attack#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
"""

In [82]:
# Ganti template lama Anda dengan yang ini
GRAPHDB_SPARQL_GENERATION_TEMPLATE = f"""
You are an expert SPARQL query generator. Your task is to write a precise and correct SPARQL SELECT query for a graph database based on the provided schema and question.

**VERY IMPORTANT RULES:**
1.  **ALWAYS use `dcterms:title` to find an entity by its name.** This is the most critical rule. For example: `?tactic dcterms:title "Initial Access"`.
2.  **NEVER try to guess an entity's URI.** Always find it by its `dcterms:title`.
3.  Use the `attack:hasTechnique` property to link a Tactic to its Techniques.
4.  Only output the raw SPARQL query without explanations or backticks.

**COMMON PREFIXES YOU SHOULD USE:**
{COMMON_PREFIXES}

**ONTOLOGY SCHEMA:**
{{schema}}

**QUESTION:**
{{prompt}}

---
**EXAMPLES OF GOOD SPARQL QUERIES:**

**Example 1: Listing entities**
Question: "can you tell me all the techniques under Initial Access?"
SPARQL Query:
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX attack: <http://w3id.org/sepses/vocab/ref/attack#>
SELECT ?techniqueName
WHERE {{
  # First, find the Tactic by its exact title
  ?tactic a attack:Tactic ;
          dcterms:title "Initial Access" .
  
  # Then, find all techniques related to that tactic
  ?tactic attack:hasTechnique ?technique .
  
  # Finally, get the name of each technique
  ?technique dcterms:title ?techniqueName .
}}
ORDER BY ?techniqueName

**Example 2: Counting entities**
Question: "how many techniques for 'Initial Access'"
SPARQL Query:
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX attack: <http://w3id.org/sepses/vocab/ref/attack#>
SELECT (COUNT(?technique) as ?techniqueCount)
WHERE {{
  # First, find the Tactic by its exact title
  ?tactic a attack:Tactic ;
          dcterms:title "Initial Access" .
  
  # Then, find and count all techniques related to that tactic
  ?tactic attack:hasTechnique ?technique .
}}
"""

GRAPHDB_SPARQL_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "prompt"],
    template=GRAPHDB_SPARQL_GENERATION_TEMPLATE,
)

# Pastikan Anda meneruskan prompt yang sudah diperbarui ke chain Anda.
# Panggil kembali fungsi query_graph Anda dengan prompt baru ini.

In [78]:
GRAPHDB_QA_TEMPLATE = """Task: Generate a natural language response from the results of a SPARQL query.
  You are an assistant that creates well-written and human understandable answers.
  The information part contains the information provided, which you can use to construct an answer.
  The information provided is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
  Make your response sound like the information is coming from an AI assistant, but don't add any information.
  Don't use internal knowledge to answer the question, just say you don't know if no information is available.
  Information:
  {context}
  
  Question: {prompt}
  Helpful Answer:"""
GRAPHDB_QA_PROMPT = PromptTemplate(
      input_variables=["context", "prompt"], template=GRAPHDB_QA_TEMPLATE
  )

In [79]:
def query_graph(user_input):
    chain = OntotextGraphDBQAChain.from_llm(
        llm=llm,
        graph=graph,
        verbose=True,
        return_intermediate_steps=True, 
        sparql_prompt =GRAPHDB_SPARQL_GENERATION_PROMPT,
        qa_prompt=GRAPHDB_QA_PROMPT,
        allow_dangerous_requests=True,  # Set to True to allow dangerous requests
        )
    result = chain.invoke(user_input)
    return result


In [83]:
query_1 = "can you tell me all the techniques under Initial Access?"
query_2 = "how many techniques for 'Initial Access'"
query_3 = "Show 5 available tactics in the database"
query_4 = "List 10 MITRE ATT&CK Techniques"

In [84]:
query_graph(query_3)




[1m> Entering new OntotextGraphDBQAChain chain...[0m
Generated SPARQL:
[32;1m[1;3mPREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX attack: <http://w3id.org/sepses/vocab/ref/attack#>

SELECT ?tactic
WHERE {
  ?tactic rdf:type attack:Tactic .
}
LIMIT 5[0m

[1m> Finished chain.[0m


{'query': 'Show 5 available tactics in the database',
 'result': 'Here are five available tactics in the database:\n\n1. Discovery\n2. Execution\n3. Inhibit-response-function\n4. Impact\n5. Evasion'}