In [1]:
from dotenv import dotenv_values, load_dotenv
load_dotenv()


import os

api_key = os.getenv('PRACTICE_KEY')
langchain_api_key = os.getenv('LANGCHAIN_KEY')


os.environ["OPENAI_API_KEY"] = api_key
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = langchain_api_key
os.environ["LANGCHAIN_PROJECT"] = "exp"

In [2]:
# Configure output
from IPython.display import HTML, display



def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))

In [3]:
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings


embed_model = OpenAIEmbedding()
Settings.embed_model = embed_model


In [6]:
from llama_index.vector_stores.elasticsearch import ElasticsearchStore



es = ElasticsearchStore(
    index_name="bhaiya_&_company",
    es_url="http://localhost:9200",
)

vector_store  = ElasticsearchStore(
    index_name="bhaiya_&_company",
    es_url="http://localhost:9200",
)



In [7]:
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

from llama_index.llms.azure_openai import AzureOpenAI
# from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

In [8]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o", temperature=0)

In [9]:
# %pip install -qU langchain-openai


In [11]:
from llama_index.core import (
    VectorStoreIndex,
    ServiceContext,
    Document,
    SimpleDirectoryReader,
)

In [12]:
from llama_index.core import (
    VectorStoreIndex,
    ServiceContext,
    StorageContext
)

In [13]:
def load_service_context(embedding):

    service_context = ServiceContext.from_defaults(
        llm=None,
        embed_model=embedding,
    )

    return service_context

In [14]:
embedding = embed_model

In [15]:
# %pip install llama_index llama-index-llms-openai llama-index-indices-managed-vectara

In [16]:
service_context = load_service_context(embedding)

  service_context = ServiceContext.from_defaults(


LLM is explicitly disabled. Using MockLLM.


In [17]:
document_store = es # vector store

In [18]:
from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain.agents import AgentExecutor
from langchain.prompts import (
    ChatPromptTemplate,
    MessagesPlaceholder,
    HumanMessagePromptTemplate
)
from langchain_core.messages import SystemMessage
from langchain_core.agents import AgentFinish

In [19]:
system_prompt = '''You are a highly knowledgeable assistant for Bhaiya & Company, 

Given a user's question, your task is to generate a precise and informative response based on the relevant documents. Use the provided documents to ensure your answers are accurate and detailed.

Ensure that your responses are well-structured and directly address the user's query. If the information is not available in the provided documents, state that clearly.'''


prompt = ChatPromptTemplate.from_messages(
                [
                    SystemMessage(content=system_prompt),
                    MessagesPlaceholder(variable_name='chat_history', optional=True),
                    HumanMessagePromptTemplate.from_template("{input}"),
                    MessagesPlaceholder(variable_name='agent_scratchpad')
                ]
            )

In [20]:

storage_context = StorageContext.from_defaults(
    vector_store=document_store,
)


index = VectorStoreIndex.from_vector_store(
    vector_store=document_store,
    storage_context=storage_context,
    service_context=service_context,
)

In [21]:
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x16c493fdf60>

In [22]:
from langchain.tools import StructuredTool
from llama_index.core.retrievers import VectorIndexAutoRetriever
from llama_index.core.vector_stores.types import MetadataInfo, VectorStoreInfo
from llama_index.core.vector_stores.types import VectorStoreQuerySpec
from pydantic import BaseModel
from typing import Optional, Dict


# Step 2: Define the metadata and vector store information
vector_store_info = VectorStoreInfo(
    content_info="Metadata of Bhaiya & Company 2023.pdf",
    metadata_info=[
        MetadataInfo(
            name="file_path",
            type="str",
            description="The path where the file is stored",
        ),
        MetadataInfo(
            name="file_name",
            type="str",
            description="The name of the file",
        ),
        MetadataInfo(
            name="original_file_name",
            type="str",
            description="The original name of the file before processing",
        ),
        MetadataInfo(
            name="file_size",
            type="int",
            description="The size of the file (in bytes), can be null",
        ),
        MetadataInfo(
            name="author",
            type="str",
            description="The author of the file, can be null",
        ),
        MetadataInfo(
            name="file_tags",
            type="list",
            description="Tags associated with the file, can be null",
        ),
        MetadataInfo(
            name="product_name",
            type="list",
            description="List of product names associated with the file",
        ),
        MetadataInfo(
            name="section",
            type="list",
            description="List of sections associated with the file",
        ),
        MetadataInfo(
            name="type_of_product",
            type="list",
            description="The types of products mentioned in the file",
        ),
        MetadataInfo(
            name="file_web_link",
            type="str",
            description="A web link to the file",
        ),
        MetadataInfo(
            name="parsed_with",
            type="str",
            description="The tool used to parse the file",
        ),
        MetadataInfo(
            name="year",
            type="int",
            description="The year the file was created or relevant to",
        ),
    ],
)

# # Assuming `index` is already defined elsewhere and contains the vector data

# # Step 3: Update the tool function to handle both query and filters
# def retrieve_metadata(query: str, filters: Optional[Dict] = None):
#     if filters is None:
#         filters = []  # Set to an empty dictionary if None
#     retriever = VectorIndexAutoRetriever(index, vector_store_info=vector_store_info)
#     query_spec = VectorStoreQuerySpec(query=query, filters=filters)
#     return retriever.retrieve(query_spec)

# # Step 4: Create the tool using StructuredTool
# metadata_retrieval_tool = StructuredTool(
#     name="bhaiya_company_metadata_retriever",  # Valid tool name
#     func=retrieve_metadata,
#     description="Retrieve metadata related to Bhaiya & Company 2023.pdf based on user queries",
#     args_schema=RetrieveMetadataInput  # Use the structured input model
# )

# # Now you can add this tool to your agent's toolset and use it as part of the Langchain agent execution
# tools = [metadata_retrieval_tool]


In [24]:
# %pip install llama-index-vector-stores-elasticsearch

In [23]:
from llama_index.core.retrievers import VectorIndexAutoRetriever
from llama_index.core.vector_stores.types import MetadataInfo, VectorStoreInfo



retriever = VectorIndexAutoRetriever(
    index, vector_store_info=vector_store_info
)

In [24]:
query = 'about bhaiya and company Sales and Profit Forecast for 2022'

In [25]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [26]:
retriever.retrieve(query)

ValidationError: 2 validation errors for VectorStoreQuerySpec
query
  field required (type=value_error.missing)
filters
  field required (type=value_error.missing)

In [None]:
agent = create_tool_calling_agent(llm, tools, prompt)

In [None]:
agent_executor  = AgentExecutor(
            agent=agent,
            tools=tools,
            verbose=True,
            # max_iterations=5,
            handle_parsing_errors=True,
            return_intermediate_steps=True
        )

In [27]:
from llama_index.core.schema import TextNode

nodes = [
    TextNode(
        text=(
            "A bunch of scientists bring back dinosaurs and mayhem breaks"
            " loose"
        ),
        metadata={"year": 1993, "rating": 7.7, "genre": "science fiction"},
    ),
    TextNode(
        text=(
            "Leo DiCaprio gets lost in a dream within a dream within a dream"
            " within a ..."
        ),
        metadata={
            "year": 2010,
            "director": "Christopher Nolan",
            "rating": 8.2,
        },
    ),
    TextNode(
        text=(
            "A psychologist / detective gets lost in a series of dreams within"
            " dreams within dreams and Inception reused the idea"
        ),
        metadata={"year": 2006, "director": "Satoshi Kon", "rating": 8.6},
    ),
    TextNode(
        text=(
            "A bunch of normal-sized women are supremely wholesome and some"
            " men pine after them"
        ),
        metadata={"year": 2019, "director": "Greta Gerwig", "rating": 8.3},
    ),
    TextNode(
        text="Toys come alive and have a blast doing so",
        metadata={"year": 1995, "genre": "animated"},
    ),
]

In [29]:
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x16c493fdf60>

In [28]:
nodes 

[TextNode(id_='c92f639c-26f8-4fcf-8e49-ecbd8ec571ca', embedding=None, metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='A bunch of scientists bring back dinosaurs and mayhem breaks loose', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'),
 TextNode(id_='08d665c0-2a37-4d74-9b13-36241e58820c', embedding=None, metadata={'year': 2010, 'director': 'Christopher Nolan', 'rating': 8.2}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'),
 TextNode(id_='7eb1fba0-e97f-48b5-bd99-850795cf1e64', e

In [30]:
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.elasticsearch import ElasticsearchStore

In [31]:
from llama_index.core.schema import TextNode

nodes = [
    TextNode(
        text=(
            "A bunch of scientists bring back dinosaurs and mayhem breaks"
            " loose"
        ),
        metadata={"year": 1993, "rating": 7.7, "genre": "science fiction"},
    ),
    TextNode(
        text=(
            "Leo DiCaprio gets lost in a dream within a dream within a dream"
            " within a ..."
        ),
        metadata={
            "year": 2010,
            "director": "Christopher Nolan",
            "rating": 8.2,
        },
    ),
    TextNode(
        text=(
            "A psychologist / detective gets lost in a series of dreams within"
            " dreams within dreams and Inception reused the idea"
        ),
        metadata={"year": 2006, "director": "Satoshi Kon", "rating": 8.6},
    ),
    TextNode(
        text=(
            "A bunch of normal-sized women are supremely wholesome and some"
            " men pine after them"
        ),
        metadata={"year": 2019, "director": "Greta Gerwig", "rating": 8.3},
    ),
    TextNode(
        text="Toys come alive and have a blast doing so",
        metadata={"year": 1995, "genre": "animated"},
    ),
]

In [32]:
vector_store = ElasticsearchStore(
    index_name="auto_retriever_movies", es_url="http://localhost:9200"
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

INFO:elastic_transport.transport:GET http://localhost:9200/ [status:200 duration:0.209s]
GET http://localhost:9200/ [status:200 duration:0.209s]


In [42]:
index = VectorStoreIndex(nodes, storage_context=storage_context)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:elastic_transport.transport:HEAD http://localhost:9200/auto_retriever_movies [status:200 duration:0.016s]
HEAD http://localhost:9200/auto_retriever_movies [status:200 duration:0.016s]
INFO:elastic_transport.transport:PUT http://localhost:9200/_bulk?refresh=true [status:200 duration:0.125s]
PUT http://localhost:9200/_bulk?refresh=true [status:200 duration:0.125s]
INFO:elastic_transport.transport:PUT http://localhost:9200/_bulk?refresh=true [status:200 duration:0.078s]
PUT http://localhost:9200/_bulk?refresh=true [status:200 duration:0.078s]


In [40]:
from llama_index.core.vector_stores import (
    MetadataFilter,
    MetadataFilters,
    FilterOperator,
)


filters = MetadataFilters(
    filters=[
        MetadataFilter(key="year", operator=FilterOperator.EQ, value="2019"),
    ]
)


In [43]:
retriever = index.as_retriever(filters=filters)
retriever.retrieve("movie ?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:elastic_transport.transport:POST http://localhost:9200/auto_retriever_movies/_search [status:200 duration:0.078s]
POST http://localhost:9200/auto_retriever_movies/_search [status:200 duration:0.078s]


[]

In [44]:

retriever = VectorIndexAutoRetriever(
    index, vector_store_info=vector_store_info, extra_filters=filters
)

In [45]:
retriever.retrieve(
    "What are 2 movies by Christopher Nolan were made before 2020?"
)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:llama_index.core.indices.vector_store.retrievers.auto_retriever.auto_retriever:Using query str: movies by Christopher Nolan
Using query str: movies by Christopher Nolan
INFO:llama_index.core.indices.vector_store.retrievers.auto_retriever.auto_retriever:Using filters: [('year', '<', '2020'), ('director', '==', 'Christopher Nolan')]
Using filters: [('year', '<', '2020'), ('director', '==', 'Christopher Nolan')]
INFO:llama_index.core.indices.vector_store.retrievers.auto_retriever.auto_retriever:Using top_k: 2
Using top_k: 2
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


ValueError: Vector Store only supports exact match filters. Please use ExactMatchFilter or FilterOperator.EQ instead.