In [None]:
import os
from eazyml_genai.components import PDFLoader
from eazyml_genai.components import PineconeDB
from eazyml_genai.components import GoogleGM
from eazyml_genai.components.vector_embedder.huggingface_embedder import (
            HuggingfaceEmbedderModel,
            HuggingfaceEmbedderProcessor
)

## Load data

### Read PDF documents
process pdf documents with unstructured data into semi-structured data or in json format

In [None]:
pdf_loader = PDFLoader(max_chunk_words=1000)
documents = pdf_loader.load(file_path=r'data/yolo.pdf')

## Index data

### Index Document

In [None]:
pine_db = PineconeDB(api_key=os.getenv("PINECONE_API_KEY"))
# pine_db = PineconeDB(api_key="pclocal", host="http://localhost:5080")

In [None]:
collection_name = 'yolo'
indexed_documents = pine_db.index_documents(
                                collection_name=collection_name,
                                documents=documents,
                                text_embedding_model=HuggingfaceEmbedderModel.ALL_MINILM_L6_V2,
                        )

### Query document

In [None]:
question = "number of convolutional layer"
results = pine_db.retrieve_documents(
                                question=question
                                )

## Generate Answer using Generative Model

In [None]:
payloads = [i['metadata'] for i in results]

In [None]:
google_gm = GoogleGM(model="gemini-2.0-flash",
                     api_key=os.getenv('GEMINI_API_KEY'))
response, input_tokens, output_tokens = google_gm.predict(question=question,
                            payloads=payloads,
                            show_token_details=True
                            )

In [None]:
parsed_response = google_gm.parse(response=response)

In [None]:
parsed_response

In [None]:
input_tokens