# UseCase B : ingest pdf in a vectorDB & query 2/2

## Query

### Context definition

Neo4j is used as Vector DB
https://neo4j.com/docs/neo4j-graphrag-python/current/user_guide_rag.html

In [10]:
import os
import re
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from dotenv import load_dotenv
from datetime import datetime

def printDone():
    # Obtenir la date et l'heure actuelle
    maintenant = datetime.now()
    # Formater la date dans le format souhaité
    date_formatee = maintenant.strftime("DONE - %A %d %B à %Hh%M et %S secondes")
    # Retourner la date formatée
    return date_formatee

load_dotenv()

os.environ["OLLAMA_URL"] = "http://ollama:11434"
os.environ["embedding_model"] = "nomic-embed-text"

os.environ["NEO4J_URI"] = "neo4j://neo4j:7687"
os.environ["NEO4J_URI_BOLT"] = "bolt://neo4j:7687"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "strongPassword1"
os.environ["INDEX_NAME"] = "pdf_chunk"
os.environ["FULLTEXT_INDEX_NAME"] = "documentFullTextIndex"

os.environ["OLLAMA_MODELE_URL"] = "http://ollama:11434/v1/"
os.environ["LLM_MODEL"] = "llama3.2:3b"

printDone()

'DONE - Wednesday 20 November à 16h56 et 25 secondes'

#### Get vector DB access

In [11]:
from neo4j import GraphDatabase
from neo4j_graphrag.retrievers import HybridRetriever
from langchain_openai import ChatOpenAI
from neo4j_graphrag.generation import GraphRAG
from neo4j_graphrag.embeddings import OpenAIEmbeddings

from langchain_ollama import OllamaEmbeddings

URI =os.environ["NEO4J_URI"]
AUTH = (os.environ["NEO4J_USERNAME"], os.environ["NEO4J_PASSWORD"])

# Connect to Neo4j database
driver = GraphDatabase.driver(URI, auth=AUTH)

printDone()

'DONE - Wednesday 20 November à 16h56 et 26 secondes'

### Get Embedder object

In [12]:
embeddings_engin = OllamaEmbeddings(base_url=os.environ["OLLAMA_URL"], model=os.environ["embedding_model"])

printDone()

'DONE - Wednesday 20 November à 16h56 et 28 secondes'

### Get retriever

In [13]:
FULLTEXT_INDEX_NAME = os.environ["FULLTEXT_INDEX_NAME"]
VECTOR_INDEX_NAME = os.environ["INDEX_NAME"]
retriever = HybridRetriever(driver, VECTOR_INDEX_NAME, FULLTEXT_INDEX_NAME, embeddings_engin)

printDone()

'DONE - Wednesday 20 November à 16h56 et 29 secondes'

### Set LLM access

In [16]:
llm = ChatOpenAI(api_key="ollama", base_url=os.environ["OLLAMA_MODELE_URL"], model_name=os.environ["LLM_MODEL"], temperature=0.1, verbose=True)

printDone()

'DONE - Wednesday 20 November à 16h58 et 33 secondes'

### Initialize the RAG pipeline

In [17]:
rag = GraphRAG(retriever=retriever, llm=llm)

printDone()

'DONE - Wednesday 20 November à 16h58 et 37 secondes'

### Query the graph

Set query

In [18]:
query_text = "Tell me about CVSS ?"
response = rag.search(query_text=query_text, retriever_config={"top_k": 8})
print(response)

printDone()

answer="CVSS (Common Vulnerability Scoring System) is a widely-used standard for measuring the severity and impact of vulnerabilities in software systems. It provides a standardized way to assess the risk associated with a vulnerability, allowing organizations to prioritize remediation efforts.\n\nThe CVSS v3.1 User Guide provides detailed information on how to use CVSS, including scoring metrics, temporal and environmental metrics, and guidelines for proper attribution and usage.\n\nCVSS is designed to measure the severity of a vulnerability, not the risk itself. It's essential to use CVSS in conjunction with other risk assessment methods to get a comprehensive understanding of the potential impact of a vulnerability.\n\nThe CVSS v3.1 Specification Document outlines the official specification for CVSS version 3.1 and provides guidance on how to score vulnerabilities using the CVSS scoring system.\n\nOverall, CVSS is an essential tool for organizations looking to assess and mitigate cy

'DONE - Wednesday 20 November à 17h00 et 34 secondes'