In [13]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [14]:
os.environ['GEMINI_API_KEY']=os.getenv("GEMINI_API_KEY")
os.environ['GROQ_API_KEY']=os.getenv("GROQ_API_KEY")
os.environ['PINECONE_API_KEY']=os.getenv("PINECONE_API_KEY")

In [15]:
from llama_index.llms.groq import Groq
from llama_index.llms.gemini import Gemini
from llama_index.embeddings.gemini import GeminiEmbedding
import pinecone
from pinecone import Pinecone
from llama_index.core import Settings

In [16]:
llm=Gemini(api_key=os.environ["GEMINI_API_KEY"])
embed_model=GeminiEmbedding(model_name="models/embedding-001",api_key=os.environ["GEMINI_API_KEY"])
pinecone_client=Pinecone(api_key=os.environ["PINECONE_API_KEY"])

In [18]:
# Settings.llm=llm
# Settings.embed_model=embed_model
# Settings.chunk_size=1024

In [19]:
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import SentenceSplitter
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core import VectorStoreIndex

In [7]:
from llama_index.core import SimpleDirectoryReader
documents=SimpleDirectoryReader("data").load_data()

In [8]:
index_description=pinecone_client.describe_index("project")
pinecone_index=pinecone_client.Index("project")
vector_store=PineconeVectorStore(pinecone_index=pinecone_index)

In [9]:
pipeline=IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=1024,chunk_overlap=20),
        embed_model
    ],
    vector_store=vector_store
)

In [20]:
pipeline.run(documents=documents)

Upserted vectors: 100%|██████████| 10/10 [00:03<00:00,  2.65it/s]


[TextNode(id_='92e59bc5-dc77-472e-b5e2-4f33f6e5ae65', embedding=[0.03512014, -0.023302969, -0.033722185, -0.0076261335, 0.05115909, 0.018954925, -0.03531946, 0.014232365, -0.011457094, 0.0066697747, -0.009186224, 0.052005608, -0.009639233, -0.02142512, -0.007075144, -0.022549177, 0.021980973, -0.0032962107, -0.011684438, -0.010754321, 0.016952932, -0.041979887, -0.0012020953, -0.080813594, 0.01052703, -0.019593427, -0.039803237, -0.038493928, -0.08791418, 0.01711096, -0.055220257, -0.002673336, -0.03128298, -0.008109223, 0.0124499, -0.052126553, -0.03470661, 0.02226503, 0.003386368, 0.03506847, 0.010733594, -0.03729494, -0.0002981723, -0.05429489, 0.015803065, -0.06220809, 0.01810126, 0.037228514, 0.0050366437, -0.044542395, -0.0071428195, -0.020814044, 0.05562831, -0.039411847, 0.010304144, 0.014166183, 0.02300065, 0.02846234, 0.03167051, 0.023075497, -0.0160939, 0.030694278, -0.009493749, 0.008039559, -0.010506266, -0.061969966, -0.046188317, 0.010310309, 0.047903035, -0.032966953, 0

In [21]:
index=VectorStoreIndex.from_vector_store(vector_store=vector_store)
retriever=VectorIndexRetriever(index=index)
query_engine=RetrieverQueryEngine(retriever=retriever)

In [22]:
response=query_engine.query("show me graph in the pdf given")

In [24]:
response

Response(response='The provided text mentions three figures: Figure 7, a scatter plot with a regression line; Figure 8, a decision tree visualization; and Figure 9, showing the integration of a model with an app.  There is no information about the actual appearance of these graphs.\n', source_nodes=[NodeWithScore(node=TextNode(id_='52b70c20-486f-4eb7-9ae9-ec5d46fa7c65', embedding=[0.0293968748, -0.0119327307, -0.0816256255, 0.0104201604, 0.0752184615, 0.0125889769, 0.0109931603, -0.0259221233, -0.00163735019, 0.0366970152, -0.0388736054, 0.0354621895, -0.0162321068, 0.026580235, 0.0116839437, -0.0328737758, 0.0407130755, 0.0130762774, -0.00518555287, 0.00717762765, -0.0154356128, 0.0032990186, 0.00992370117, -0.0230232142, -0.00168434321, -0.0135845374, -0.00143406016, -0.0677460656, -0.0613884889, 0.00536772329, -0.0442059413, 0.0117428415, -0.0446222946, 0.0129991183, 0.0106111495, -0.0255438946, -0.0323183537, 0.0105528226, -0.00373764476, 0.0182670578, 0.0181515291, -0.00421331776,