## Import libraries

In [None]:
import os
from eazyml_genai.components import PDFLoader
from eazyml_genai.components import (
    QdrantDB,
)
from eazyml_genai.components import GoogleGM
from eazyml_genai.components.vector_embedder.huggingface_embedder import (
            HuggingfaceEmbedderModel,
            HuggingfaceEmbedderProcessor
)

## Load data

### Read PDF documents
process pdf documents with unstructured data into semi-structured data or in json format

In [None]:
pdf_loader = PDFLoader(max_chunk_words=1000)
documents = pdf_loader.load(file_path=r'data/yolo.pdf')

## Index data

### Index Document

In [None]:
collection_name = 'yolo'
qdrant_db = QdrantDB(location=':memory:')
qdrant_db.index_documents(collection_name=collection_name,
                          documents=documents,
                          text_embedding_model=HuggingfaceEmbedderModel.ALL_MINILM_L6_V2,
                          image_embedding_model=HuggingfaceEmbedderModel.CLIP_VIT_BASE_PATCH32,
                          image_embedding_processor=HuggingfaceEmbedderProcessor.CLIP_VIT_BASE_PATCH32
                          )

### Query document

In [None]:
question = 'Value of mAP score of Fast R-CNN on experimental result on pascal voc 2007 data'
total_hits = qdrant_db.retrieve_documents(collection_name, question, top_k=5)

## Generate Answer using Generative Model

In [None]:
payloads = [hit.payload for hit in total_hits]

In [None]:
google_gm = GoogleGM(model="gemini-2.0-flash",
                     api_key=os.getenv('GEMINI_API_KEY'))
response, input_tokens, output_tokens = google_gm.predict(question=question,
                            payloads=payloads,
                            show_token_details=True
                            )

In [None]:
print(google_gm.parse(response))

In [None]:
output_tokens

In [None]:
input_tokens