# RAG Retrieve Context

Once the data is uploaded into the vector DB, the next step is connect to an LLM and start retrieving context from the user's query

## Import libraries

In [1]:
from qdrant_client import QdrantClient, models
import requests

import sys

sys.path.append("..")

from rag_llm_energy_expert.credentials import get_qdrant_config, generate_id_token
from rag_llm_energy_expert.config import GCPConfig

In [2]:
qdrant_config = get_qdrant_config()
gcp_config = GCPConfig()

In [3]:
qdrant_client = QdrantClient(url = qdrant_config.URL, api_key = qdrant_config.API_KEY.get_secret_value())

In [4]:
def process_query_results(results: list):
    full_text = ""

    for query_response in results:

        query_response_points = query_response.points

        for point in query_response_points:

            full_text += point.payload["text"] + "\n\n"

    return full_text  


def semantic_search(
        vectors:list,
        collection_name:str, 
        limit:int
        ):
    """
    
    """

    # Create a list of QueryRequest, each QueryRequest object is related to each chunk created from the query
    search_queries = [models.QueryRequest(
    query = vector, 
    with_payload = True,
    with_vector = False,
    limit = limit,
    ) for vector in vectors]


    # Get a list of results from the query batch
    results = qdrant_client.query_batch_points(
        collection_name = collection_name,
        requests= search_queries,

        )
    
    full_text = process_query_results(results)

    return full_text

In [5]:
query = "Se establecen mecanismos legales efectivos para prevenir"

In [6]:
payload = {
    "text":query,
}

token = generate_id_token(audience=gcp_config.EMBEDDING_SERVICE_URL)

headers = {"Authorization": f"Bearer {token}"}

response = requests.post(url = gcp_config.EMBEDDING_SERVICE_URL + "/embed-text", json=payload, headers = headers)

In [7]:
response.text

'{"chunks":[{"vector_id":"973298cd-999b-48f0-a78a-7109dcba6da7","vector":[0.0047289286740124226,0.09051237255334854,-0.09641570597887039,-0.08437274396419525,-0.05107889696955681,0.04406353458762169,0.014904397539794445,0.0817609652876854,-0.028519457206130028,0.10596425831317902,0.09042844921350479,-0.004646106157451868,-0.025427252054214478,0.05579032748937607,0.005898213945329189,-0.004864892922341824,0.04255739971995354,0.023159107193350792,-0.003328348509967327,0.092003233730793,0.0386924222111702,-0.0006987742963247001,-0.06964496523141861,0.044377997517585754,-0.10041674971580505,-0.012088440358638763,-0.027638353407382965,-0.05524108186364174,0.0703388899564743,-0.03877624124288559,0.01806848682463169,-0.00536686647683382,0.07843383401632309,0.050226371735334396,-0.03808055818080902,-0.034424714744091034,0.08294550329446793,-0.07974183559417725,-0.04581555351614952,0.0502239391207695,-0.05966775864362717,-0.06526314467191696,-0.08734866976737976,-0.020728779956698418,-0.0141292

In [8]:
embeddings = response.json()["chunks"]

In [9]:
vectors = [chunk["vector"] for chunk in embeddings]

In [10]:
len(vectors[0])

384

In [11]:
len(vectors)

1

In [12]:
search_queries = [models.QueryRequest(
    query = vector, 
    with_payload = True,
    with_vector = False,
    limit = 5) for vector in vectors]

search_queries

[QueryRequest(shard_key=None, prefetch=None, query=[0.0047289286740124226, 0.09051237255334854, -0.09641570597887039, -0.08437274396419525, -0.05107889696955681, 0.04406353458762169, 0.014904397539794445, 0.0817609652876854, -0.028519457206130028, 0.10596425831317902, 0.09042844921350479, -0.004646106157451868, -0.025427252054214478, 0.05579032748937607, 0.005898213945329189, -0.004864892922341824, 0.04255739971995354, 0.023159107193350792, -0.003328348509967327, 0.092003233730793, 0.0386924222111702, -0.0006987742963247001, -0.06964496523141861, 0.044377997517585754, -0.10041674971580505, -0.012088440358638763, -0.027638353407382965, -0.05524108186364174, 0.0703388899564743, -0.03877624124288559, 0.01806848682463169, -0.00536686647683382, 0.07843383401632309, 0.050226371735334396, -0.03808055818080902, -0.034424714744091034, 0.08294550329446793, -0.07974183559417725, -0.04581555351614952, 0.0502239391207695, -0.05966775864362717, -0.06526314467191696, -0.08734866976737976, -0.02072877

In [13]:
collection_name = qdrant_config.COLLECTION_NAME + qdrant_config.COLLECTION_VERSION

results = qdrant_client.query_batch_points(
    collection_name = collection_name,
    requests= search_queries,
)

In [14]:
full_text = ""

for query_response in results:

    query_response_points = query_response.points

    for point in query_response_points:

        full_text += point.payload["text"] + "\n\n"

In [15]:
print(full_text)

ación en las utilidades del proyecto. 
• La Legislación prevé mecanismos alternativos en caso de que no se alcancen acuerdos en la 
etapa de negociación inicial. En primera instancia, contempla un proceso de mediación, a car-
go de la Secretaría de Desarrollo Territorial y Urbano, y en última instancia, prevé la figura de 
la “Servidumbre Legal de Hidrocarburos”, la cual podrá ser decretada por la vía judicial o ad-
ministrativa.
IX. 	 Medidas efectivas de transparencia y combate a la corrupción
Un modelo energético agotado como el que prevalecía en México previo a la Reforma, no sólo 
genera ineficiencias de productividad, sino incentivos poco adecuados para una industria ener-
gética transparente y abierta a

ética transparente y abierta a la rendición de cuentas.
Características del nuevo modelo
• Se establecen mecanismos legales efectivos para prevenir, identificar y sancionar a quienes rea-
licen actos u omisiones que constituyan conductas ilícitas o prácticas indebidas, para obte

In [16]:
collection_name = qdrant_config.COLLECTION_NAME + qdrant_config.COLLECTION_VERSION

query = "En el nuevo modelo, cómo se considera a Pemex?"
#El modelo propuesto también considera que Pemex podrá migrar a la nueva modalidad
payload = {
    "text":query,
}

token = generate_id_token(audience=gcp_config.EMBEDDING_SERVICE_URL)

headers = {"Authorization": f"Bearer {token}"}

response = requests.post(url = gcp_config.EMBEDDING_SERVICE_URL + "/embed-text", json=payload, headers=headers)
embeddings = response.json()["chunks"]
vectors = [chunk["vector"] for chunk in embeddings]
context = semantic_search(vectors, collection_name=collection_name, limit = 5)

print(context)

 elegir al socio 
(incluyendo las asignaciones de la Ronda Cero).
• El modelo propuesto también considera que Pemex podrá migrar a la nueva modalidad de 
contratación, aquellos contratos que en el pasado fueron otorgados por medio de una licita-
ción. 
• Para asegurar el desarrollo eficiente del sector y la existencia de un mercado donde exista 
competencia, los contratos para la industria de hidrocarburos no podrán dar exclusividad  o 
preferencia ilícita que beneficie a organizaciones empresariales, sociales o sindicales.
• Toda la información geológica del país será entregada a CNH, quien la administrará en el Cen-
tro Nacional de Información de Hidrocarburos. Pemex y las empresas productivas del Estado 
y los

raprestación para lo-
grar el mayor beneficio para el desarrollo de largo plazo del país. 
• Toda la información de los contratos estará disponible para todos los mexicanos.
• Pemex y los particulares podrán reportar a inversionistas y reguladores, así como al públi-
co en ge