In [1]:
from llama_index.readers.file import PyMuPDFReader
from rag_app.services.ingest import IngestionService
from llama_index.core.node_parser import SemanticSplitterNodeParser

from rag_app.embeddings import get_chunk_embeddings, get_embed_model
from openai import AsyncOpenAI
from rag_app.config.settings import settings
from rag_app.core.vector_client import VectorClient

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import nest_asyncio
nest_asyncio.apply()

reader = PyMuPDFReader()
chunk_embed_model = get_chunk_embeddings()
embedding_client = get_embed_model()

client = AsyncOpenAI(base_url=settings.local_models.completion_base_url, api_key="test")

vector_client = VectorClient(
    api_key=settings.pinecone.api_key,
    environment=settings.pinecone.environment,
    index_name=settings.pinecone.index_name,
    dimension=settings.pinecone.dimension,
    metric=settings.pinecone.metric,
    cloud=settings.pinecone.cloud,
    region=settings.pinecone.region,
)


node_parser = SemanticSplitterNodeParser(embed_model=chunk_embed_model)

ingest_service = IngestionService(
    reader=reader,
    node_parser=node_parser,
    client=client,
    vector_client=vector_client,
    embedding_client=embedding_client,
)

nodes = await ingest_service.ingest(file_path="C:/Users/mrudh/Documents/Data/CWMG-KS-Vol-001-I.pdf")

[32m2025-10-21 07:10:24[0m | [1mINFO    [0m | [36mrag_app.embeddings[0m:[36mget_chunk_embeddings[0m:[36m23[0m - [1mUsing llama-index Local Embedding Model[0m
[32m2025-10-21 07:10:24[0m | [1mINFO    [0m | [36mrag_app.embeddings[0m:[36mget_embed_model[0m:[36m39[0m - [1mInitializing Local Models Embedding Model[0m
[32m2025-10-21 07:10:24[0m | [1mINFO    [0m | [36mrag_app.services.ingest[0m:[36m_ingest_file[0m:[36m29[0m - [1mLoading documents from C:/Users/mrudh/Documents/Data/CWMG-KS-Vol-001-I.pdf[0m
[32m2025-10-21 07:10:30[0m | [1mINFO    [0m | [36mrag_app.services.ingest[0m:[36m_ingest_file[0m:[36m31[0m - [1mLoaded 457 documents.[0m
[32m2025-10-21 07:10:30[0m | [1mINFO    [0m | [36mrag_app.services.ingest[0m:[36m_preprocess_documents[0m:[36m44[0m - [1mStarting document preprocessing...[0m
[32m2025-10-21 07:10:30[0m | [1mINFO    [0m | [36mrag_app.services.ingest[0m:[36m_filter_empty_documents[0m:[36m39[0m - [1mFiltere

In [4]:
nodes[0].metadata

{'total_pages': 457,
 'file_path': 'C:/Users/mrudh/Documents/Data/CWMG-KS-Vol-001-I.pdf',
 'source': '32',
 'questions': ['Why did the white farmers fear the Indian traders?',
  'What were the advantages of the Indian traders?',
  'How did the Indian traders operate in comparison to the British and Dutch?',
  'What was the impact of the Indian traders on the prices of white farmer goods?',
  'What was the concern regarding the free entry and trade of the Indians into the country?']}