In [1]:
from dotenv import load_dotenv
import os
from llama_index.callbacks import LlamaDebugHandler
from llama_index.callbacks import CallbackManager

import pinecone
from llama_index import VectorStoreIndex
from llama_index import ServiceContext
from llama_index.vector_stores import PineconeVectorStore
from pinecone import Pinecone

  from tqdm.autonotebook import tqdm


In [2]:
load_dotenv()

True

In [3]:
pinecone_client = Pinecone(
    api_key=os.environ["PINECONE_API_KEY"], 
    environment=os.environ["PINECONE_ENVIRONMENT"]
)

In [4]:
pinecone_index = "llamaindex-documentation-helper"

In [5]:
pinecone_index = pinecone_client.Index(index_name=pinecone_index,host=os.environ["PINECONE_HOST"])

In [6]:
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
vector_store

PineconeVectorStore(stores_text=True, is_embedding_query=True, flat_metadata=False, api_key=None, index_name=None, environment=None, namespace=None, insert_kwargs={}, add_sparse_vector=False, text_key='text', batch_size=100, remove_text_from_metadata=False)

In [7]:
llama_debug= LlamaDebugHandler(print_trace_on_end=True)
llama_debug

<llama_index.callbacks.llama_debug.LlamaDebugHandler at 0x22445fdf250>

In [8]:
callback_manager= CallbackManager(handlers=[llama_debug])
callback_manager

<llama_index.callbacks.base.CallbackManager at 0x22445f87d00>

In [9]:
service_context= ServiceContext.from_defaults(callback_manager=callback_manager)
service_context

ServiceContext(llm_predictor=LLMPredictor(system_prompt=None, query_wrapper_prompt=None, pydantic_program_mode=<PydanticProgramMode.DEFAULT: 'default'>), prompt_helper=PromptHelper(context_window=4096, num_output=256, chunk_overlap_ratio=0.1, chunk_size_limit=None, separator=' '), embed_model=OpenAIEmbedding(model_name='text-embedding-ada-002', embed_batch_size=100, callback_manager=<llama_index.callbacks.base.CallbackManager object at 0x0000022445F87D00>, additional_kwargs={}, api_key='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', api_base='https://api.openai.com/v1', api_version='', max_retries=10, timeout=60.0, default_headers=None, reuse_client=True, dimensions=None), transformations=[SentenceSplitter(include_metadata=True, include_prev_next_rel=True, callback_manager=<llama_index.callbacks.base.CallbackManager object at 0x0000022445F87D00>, id_func=<function default_id_func at 0x0000022445B11990>, chunk_size=1024, chunk_overlap=200, separator=' ', paragraph_separator='\n\n

In [11]:
index= VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context=service_context)
index

**********
Trace: index_construction
**********


<llama_index.indices.vector_store.base.VectorStoreIndex at 0x22448794df0>

In [12]:
query = "What is a LlamaIndex query engine?"
query_engine= index.as_query_engine()
response = query_engine.query(query)
print(response)

**********
Trace: query
    |_query ->  4.761554 seconds
      |_retrieve ->  1.164686 seconds
        |_embedding ->  0.413293 seconds
      |_synthesize ->  3.596868 seconds
        |_templating ->  0.0 seconds
        |_llm ->  3.578571 seconds
**********
A LlamaIndex query engine is a component of the LlamaIndex system that allows users to perform queries on their indexes and graphs. It enables users to search for specific information or retrieve relevant data from the structured formats generated by LlamaIndex. The query engine is designed to handle both simple queries, such as semantic search, and more complex queries that involve composable graphs.
