In [1]:
from injector import Module, Binder, singleton, provider
from rag.manager.llm_manager import LLMManager
from rag.manager.embed_manager import EmbeddingManager
from rag.manager.vector_store_manager import VectorStoreManager
from rag.manager.index_manager import IndexManager
from rag.manager.node_manager import NodeManager
from rag.config import Config
from llama_index.core.storage import StorageContext

class ChatModule(Module):
    @singleton
    @provider
    def provide_config(self) -> Config:
        return Config()  # Ensure this config is instantiated with the correct settings

    @singleton
    @provider
    def provide_llm_manager(self, config: Config) -> LLMManager:
        return LLMManager(config)

    @singleton
    @provider
    def provide_embedding_manager(self, config: Config) -> EmbeddingManager:
        return EmbeddingManager(config)

    @singleton
    @provider
    def provide_vector_store_manager(self, config: Config) -> VectorStoreManager:
        return VectorStoreManager(config)

    @singleton
    @provider
    def provide_node_manager(self, config: Config) -> NodeManager:
        return NodeManager(config)

    @singleton
    @provider
    def provide_storage_context(self, vector_store_manager: VectorStoreManager, node_manager: NodeManager) -> StorageContext:
        return StorageContext.from_defaults(
            vector_store=vector_store_manager.vector_store,
            docstore=node_manager.doc_store,
            index_store=node_manager.index_store,
        )

    @singleton
    @provider
    def provide_index_manager(self, config: Config, storage_context: StorageContext, embedding_manager: EmbeddingManager) -> IndexManager:
        return IndexManager(
            storage_context=storage_context,
            embed_model=embedding_manager.embedding_model,
            local_data_path=config.LOCAL_DATA_PATH,
            show_progress=config.SHOW_PROGRESS,
        )

    def configure(self, binder: Binder) -> None:
        # The bindings are now handled by the provider methods
        pass

In [2]:
from injector import Injector
from rag.services.chat_service import ChatService  

injector = Injector([ChatModule()])

# chat_service = injector.get(ChatService)
# chat_service = injector.get(IndexManager)

# message = "What are the current interest rates according to the bank documents?"
# response = chat_service.chat(message)

# print(response)


In [3]:
import pymupdf4llm

llama_reader = pymupdf4llm.LlamaMarkdownReader()  # Replace with actual implementation
documents = llama_reader.load_data(r'c:\Users\shres\Desktop\DocParser.pdf')
chat_service = injector.get(IndexManager)
chat_service.ingest(documents)

Successfully imported LlamaIndex
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf...
Processing c:\Users\shres\Desktop\DocParser.pdf..

  from .autonotebook import tqdm as notebook_tqdm
Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 250.00it/s]
Generating embeddings: 100%|██████████| 2/2 [00:21<00:00, 10.52s/it]
Generating embeddings: 0it [00:00, ?it/s]
Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 527.85it/s]
Generating embeddings: 100%|██████████| 2/2 [00:11<00:00,  5.70s/it]
Generating embeddings: 0it [00:00, ?it/s]
Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 803.35it/s]
Generating embeddings: 100%|██████████| 1/1 [00:10<00:00, 10.11s/it]
Generating embeddings: 0it [00:00, ?it/s]
Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 377.93it/s]
Generating embeddings: 100%|██████████| 2/2 [00:11<00:00,  5.89s/it]
Generating embeddings: 0it [00:00, ?it/s]
Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 1410.80it/s]
Generating embeddings: 100%|██████████| 1/1 [00:11<00:00, 11.89s/it]
Generating embeddings: 0it [00:00, ?it/s]
Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 1144.11it/s]
Generating embeddings

[Document(id_='6e2fff36-f878-4fa5-84e7-465af9027320', embedding=None, metadata={'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'LaTeX with hyperref', 'producer': 'pdfTeX-1.40.21', 'creationDate': 'D:20230503011511Z', 'modDate': 'D:20230503011511Z', 'trapped': '', 'encryption': None, 'page': 1, 'total_pages': 19, 'file_path': 'c:\\Users\\shres\\Desktop\\DocParser.pdf'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='## DocParser: End-to-end OCR-free Information Extraction from Visually Rich Documents\n\nMohamed Dhouib[⋆][1][,][2[0000][−][0002][−][5587][−][1028]], Ghassen\nBettaieb[1[0000][−][0003][−][3314][−][867][X][]], and Aymen Shabou[1[0000][−][0001][−][8933][−][7053]]\n\n\n1 DataLab Groupe, Credit Agricole S.A, Montrouge, France\n2 Ecole polytechnique, Palaiseau, France\n```\n       mohamed.dhouib@polytechnique.edu[∗]\n\n```\n_{ghassen.bettaieb,aymen.shabou}@credit-agricole-sa.fr_\n```\n       https

In [4]:
chat_service = injector.get(ChatService)
inject_service = injector.get(IndexManager)
message = "What is docparser?"
response = chat_service.chat(message)


Retrieved 5 nodes
Node 1:
  Content: 2 M. Dhouib et al.

of all, these approaches need positional annotations along with textual annotati...
  Score: 0.754372
Node 2:
  Content: DocParser: End-to-end OCR-free Information Extraction from Visually Rich Documents

Mohamed Dhouib[⋆...
  Score: 0.6434012
Node 3:
  Content: 6 Conclusion

We have introduced DocParser, a fast end-to-end approach for information extraction fr...
  Score: 0.6013237
Node 4:
  Content: DocParser 13

**Table 5. The effect of decreasing the width of the feature map in various**
**stages...
  Score: 0.5684308
Node 5:
  Content: 16 M. Dhouib et al.

to-end models, DocParser’s encoder is specifically designed to capture both
int...
  Score: 0.5668872
Error occurred: timed out


In [7]:
response

'I apologize, but an error occurred while processing your request. Please try again or contact support if the issue persists.'