# UseCase A : download, ingest & query in a manually crafted data schema all NVD's CVE in a graph 3/3

## Query in a Neo4j knowledge graph

## 1. Set context

In [13]:
import os
from dotenv import load_dotenv
from datetime import datetime
import json

def printDone():
    # Obtenir la date et l'heure actuelle
    maintenant = datetime.now()
    # Formater la date dans le format souhaité
    date_formatee = maintenant.strftime("DONE - %A %d %B à %Hh%M et %S secondes")
    # Retourner la date formatée
    return date_formatee

load_dotenv()

os.environ["OLLAMA_URL"] = "http://ollama:11434"
os.environ["embedding_model"] = "nomic-embed-text"

os.environ["NEO4J_URI"] = "neo4j://neo4j:7687"
os.environ["NEO4J_URI_BOLT"] = "bolt://neo4j:7687"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "strongPassword1"
os.environ["NEO4J_DB"] = "neo4j"

os.environ["OLLAMA_MODELE_URL"] = "http://ollama:11434/v1/"
os.environ["LLM_MODEL"] = "llama3.2:3b"

printDone()

'DONE - Wednesday 20 November à 14h09 et 29 secondes'

#### Get vector DB access

In [2]:
from langchain.memory import ConversationBufferMemory
from langchain.chains.graph_qa.cypher import GraphCypherQAChain
from langchain_ollama import OllamaLLM
from langchain_core.prompts import PromptTemplate
from langchain_community.graphs import Neo4jGraph

URI =os.environ["NEO4J_URI"]
USER = os.environ["NEO4J_USERNAME"]
PWD = os.environ["NEO4J_PASSWORD"]
DB = os.environ["NEO4J_DB"]

# Connect to Neo4j database
kg = Neo4jGraph(url=URI, username=USER, password=PWD, database=DB)
kg.refresh_schema()

printDone()

'DONE - Wednesday 20 November à 14h05 et 03 secondes'

### Get retriever

In [3]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
    Instructions:
    Use only the provided relationship types and properties in the schema.
    Do not use any other relationship types or properties that are not provided.
    Do not invent answers.
 
    Schema:
    {schema}
    Note: Do not include any explanations or apologies in your responses.
    Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
    Examples: Here are a few examples of generated Cypher statements for particular questions:
    # How many vulnerabilities have phishing as a common weakness ?
    MATCH (cve:CVE)-[]-(cwe:CWE) WHERE cwe.Description CONTAINS 'phishing' OR cwe.Description CONTAINS 'Phishing' RETURN COUNT(cve) AS TotalVulnerabilities
    # How many CVE are related to privileges ?
    MATCH (cve:CVE) WHERE ANY(x IN cve.Description WHERE x CONTAINS 'privileges' OR x CONTAINS 'Privileges') RETURN COUNT(cve) AS TotalCVE
    # What is the CVE-2022-43958 about?
    MATCH (cve:CVE) WHERE cve.Name = "CVE-2022-43958" RETURN cve.Name AS Name, cve.Description[0] AS Description
    # What is the latest CVE related to phishing ?
    MATCH (cve:CVE) WHERE ANY(x IN cve.Description WHERE x CONTAINS 'phishing' OR x CONTAINS 'Phishing') RETURN cve.Published_Date AS Latest_CVE_Published_Date, cve.Name AS CVE_Name, cve.Description[0] as Description ORDER BY cve.Published_Date DESC LIMIT 1
   
    The question is:
    {question}"""

printDone()

'DONE - Wednesday 20 November à 14h05 et 03 secondes'

In [4]:
CYPHER_GENERATION_PROMPT = PromptTemplate(input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE)

printDone()

'DONE - Wednesday 20 November à 14h05 et 03 secondes'

### Set LLM access

In [5]:
llm=OllamaLLM(base_url= os.environ["OLLAMA_URL"],model=os.environ["LLM_MODEL"])
printDone()

'DONE - Wednesday 20 November à 14h05 et 03 secondes'

In [6]:
# Utilisation mise à jour de ConversationBufferMemory
memory = ConversationBufferMemory(memory_key="history", input_key="question")
printDone()

  memory = ConversationBufferMemory(memory_key="history", input_key="question")


'DONE - Wednesday 20 November à 14h05 et 03 secondes'

### Initialize the RAG pipeline

In [7]:
kg_qa= GraphCypherQAChain.from_llm(llm=llm, graph = kg, verbose=True, cypher_prompt= CYPHER_GENERATION_PROMPT, memory=memory, allow_dangerous_requests="true")
kg_qa.input_key = "question"

printDone()

'DONE - Wednesday 20 November à 14h05 et 03 secondes'

### Query the graph

In [21]:
query_text = "Tell me about CVE-2017-0144"
response=kg_qa.invoke(query_text)
print(response['result'])

printDone()



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (cve:CVE) WHERE cve.Name = "CVE-2017-0144" RETURN cve.Name AS Name, cve.Description[0] AS Description[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
CVE-2017-0144 was a security vulnerability in OpenSSL, a widely used cryptographic library. The vulnerability allowed attackers to obtain sensitive information such as private keys and certificates stored on the system. It was discovered in January 2017 and was patched by updating OpenSSL to version 1.0.2h or later.


'DONE - Wednesday 20 November à 14h14 et 36 secondes'