#### RAG (Retrieval Augmented Generation with LangChain) 
This component is for testing the rag with Langchain on a pdf document.


In [None]:
from dotenv import load_dotenv
import os
from langchain_google_vertexai import VertexAIEmbeddings
from google.oauth2 import service_account
import json
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.vectorstores import InMemoryVectorStore
load_dotenv()


#VertexAI
GCP_PROJECT_ID = os.getenv("GCP_PROJECT_ID")
GCP_REGION = os.getenv("GCP_REGION")
GCP_CREDENTIALS_JSON = os.environ.get("GCP_CREDENTIALS_JSON")
credentials= service_account.Credentials.from_service_account_info(json.loads(GCP_CREDENTIALS_JSON))
scoped_creds = credentials.with_scopes(["https://www.googleapis.com/auth/cloud-platform"])
GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash-exp",
    google_api_key = GEMINI_API_KEY
)

embeddings = VertexAIEmbeddings(
    model="text-embedding-004",
    project=GCP_PROJECT_ID,
    location=GCP_REGION,
    credentials=scoped_creds
    )


In [8]:
from pathlib import Path
from langchain_community.document_loaders import PyPDFLoader
pdf_path = Path.cwd().parents[3] / "licitaciones/licitacion_aifa.pdf"
loader = PyPDFLoader(file_path=pdf_path)
pages = []
async for page in loader.alazy_load():
    pages.append(page)


In [25]:
#Vector search over PDFs
vector_store = InMemoryVectorStore.from_documents(pages,embeddings)
docs = vector_store.similarity_search("cuáles son las fechas importantes de la licitación?",k=3)

for doc in docs:
    print(f"metadata: {doc.metadata} content: {doc.page_content[:300]}\n")


metadata: {'producer': 'Adobe Acrobat Pro DC 19 Paper Capture Plug-in', 'creator': 'PyPDF', 'creationdate': '2025-01-27T21:36:14-06:00', 'moddate': '2025-01-27T21:36:14-06:00', 'source': '/home/angel/Documentos/licitacion-python-venv/licitacion-app-python/licitaciones/licitacion_aifa.pdf', 'total_pages': 102, 'page': 15, 'page_label': '16'} content: LICITACIÓN PÚBLICA DE CARÁCTER NACIONAL ELECTRÓNICA. 
 
SERVICIO DE MANTENIMIENTO Y CONSERVACIÓN A EQUIPOS DE RESPALDO DE ENERGÍA UPS 
ESTÁTICOS. 
 
 
Hoja 16 de 102 
 
 
Junta de aclaraciones. 
Día: 29 mes: Enero año: 2025. hora: 09:00 hrs. 
Medio: Compranet. 
 
Acto de presentación y apertura de p

metadata: {'producer': 'Adobe Acrobat Pro DC 19 Paper Capture Plug-in', 'creator': 'PyPDF', 'creationdate': '2025-01-27T21:36:14-06:00', 'moddate': '2025-01-27T21:36:14-06:00', 'source': '/home/angel/Documentos/licitacion-python-venv/licitacion-app-python/licitaciones/licitacion_aifa.pdf', 'total_pages': 102, 'page': 2, 'page_label': '3'} conte