In [None]:
!pip install llama-index-llms-groq
!pip install llama-index
!pip install llama-index-embeddings-ollama
!pip install llama-index-llms-ollama
!pip install llama-index-llms-openai-like
!pip install llama-index-vector-stores-chroma
!pip install llama-index-embeddings-huggingface
!pip install llama-index-llms-mistralai

In [1]:
# 1.0
import nest_asyncio
nest_asyncio.apply()

In [2]:
# 1.0 SimpleDirectoryReader can also load metadata from a dictionary
#     https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/
from llama_index.core.readers import SimpleDirectoryReader
from llama_index.readers.file import PagedCSVReader

# 1.1 The Settings is a bundle of commonly used resources used 
#     during the indexing and querying stage in a LlamaIndex workflow/application.
from llama_index.core import Settings

# 1.2 If using LocalAI
# https://docs.llamaindex.ai/en/stable/examples/llm/localai/
#from llama_index.llms.openai_like import OpenAILike

# 1.3 Ollama related
# https://docs.llamaindex.ai/en/stable/examples/embeddings/ollama_embedding/
from llama_index.embeddings.ollama import OllamaEmbedding
#from llama_index.llms.ollama import Ollama


# 1.4 Vector store related
import chromadb
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore

# 1.5 Misc
import os
import pandas as pd

In [3]:
# 2.0 Define embedding function

# embed_model = HuggingFaceEmbedding(
#                                     model_name="BAAI/bge-base-en-v1.5"
#                                    )

embed_model= OllamaEmbedding(
                                    model_name="nomic-embed-text",      # Using foundational model may be overkill
                                    base_url="http://localhost:11434",
                                    #dimensions=512,
                                    #ollama_additional_kwargs={"mirostat": 0},
                                  )

Settings.embed_model = embed_model

In [4]:
'''# 2.1 Settings can set the global configuration. Local configurations (transformations, LLMs, embedding models).
#     These can be passed directly into the interfaces that make use of them.

## Very very fast and Excellent
# pip install llama-index-llms-mistralai
from llama_index.llms.mistralai import MistralAI
llm = MistralAI(api_key="VIScv20xwi7bmBbxZ6SiNJzkh35ZOWvM")
Settings.llm = llm'''

'# 2.1 Settings can set the global configuration. Local configurations (transformations, LLMs, embedding models).\n#     These can be passed directly into the interfaces that make use of them.\n\n## Very very fast and Excellent\n# pip install llama-index-llms-mistralai\nfrom llama_index.llms.mistralai import MistralAI\nllm = MistralAI(api_key="VIScv20xwi7bmBbxZ6SiNJzkh35ZOWvM")\nSettings.llm = llm'

In [5]:
from llama_index.llms.groq import Groq
# Following LLM gives good results
llm = Groq(
            model= "qwen-qwq-32b", #         "llama-3.3-70b-versatile",
            api_key="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",
            temperature = 0.5,
        )
Settings.llm = llm

In [6]:
# 3.0 Reading data in pandas
#     It has nothing to do with subsequent analysis/usage

file_path = ('/home/ashok/Downloads/rental2.csv') # insert the path of the csv file
#file_path = ('/home/ashok/Downloads/csvrag/data/data.csv') # insert the path of the csv file
df = pd.read_csv(file_path)
df.head(5)

Unnamed: 0,Property Name,Area,City,furnishing,bathroom,tenent-preffered,status,carpet-area,super-area,floor,...,overlooking,Total Price,balcony,society,dimensions,owner-resides,plot-area,ownership,transaction,parking
0,"3 BHK House for Rent in Horamavu, Bangalore",Horamavu,Bangalore,Semi-Furnished,3.0,Bachelors,Immediately,0,2400 sqft,0,...,0,75000,4.0,0.0,0,0,0.0,0,0.0,0.0
1,"1 BHK House for Rent in Thambu Chetty Palya, B...",Thambu Chetty Palya,Bangalore,Semi-Furnished,2.0,Bachelors/Family,Immediately,0,700 sqft,1 out of 4,...,0,16000,0.0,0.0,0,0,0.0,0,0.0,0.0
2,"1 BHK House for Rent in Uttarahalli Main Road,...",Uttarahalli Main Road,Bangalore,Unfurnished,1.0,Bachelors,Immediately,200 sqft,0,3 out of 4,...,0,4500,0.0,0.0,30 X 40 ft Sqft,0,0.0,0,0.0,0.0
3,"3 BHK House for Rent in Koramangala, Bangalore",Koramangala,Bangalore,Semi-Furnished,3.0,Bachelors/Family,Immediately,2700 sqft,0,1 out of 2,...,0,1 Lac,3.0,0.0,0,0,0.0,0,0.0,0.0
4,3 BHK House for Rent in Sri Venkateshwara Layo...,Sri Venkateshwara Layout Munnekollal,Bangalore,Semi-Furnished,3.0,Bachelors/Family,From Apr '25,1000 sqft,0,1 out of 4,...,0,32000,1.0,0.0,0,0,0.0,0,0.0,0.0


In [7]:
# 4.0 PagedCSVReader displays each row in an LLM-friendly format. Each row as a separate document.
csv_reader = PagedCSVReader()

# 4.1
reader = SimpleDirectoryReader( 
                                input_files=[file_path],
                                file_extractor= {".csv": csv_reader}
                               )

# 4.2
docs = reader.load_data()

In [8]:
# 5.0 Create client and a new collection
#     The following is  in-memory database and NOT a persistent collection.
#     chroma_client = chromadb.EphemeralClient()

# 5.1 This creates persistent collection. A folder by name of chromadb
#     is created and below that a chroma.sqlite3 database exists:

chroma_client = chromadb.PersistentClient(path="/home/ashok/Documents/chroma_db")

In [9]:
# 5.2 Check if collection exists. If so delete it.
#     Collections are the grouping mechanism for embeddings, documents, and metadata.
#     Chromadb can have multiple collections

if 'datastore' in chroma_client.list_collections():
    chroma_client.delete_collection("datastore")
    chroma_collection = chroma_client.create_collection("datastore")  
else:
    # Create collection afresh
    chroma_collection = chroma_client.create_collection("datastore")   

# 5.3 Get collection information:
chroma_collection

Collection(name=datastore)

In [10]:
# 6.0 Set up a blank ChromaVectorStore and load in data
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

# 6.1
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [11]:
# 6.2 Takes docs and storage context:
#     Repeating this operation, doubles the number of vectors/records in the collection

index = VectorStoreIndex.from_documents(
                                         docs,
                                         storage_context=storage_context,
                                         show_progress= True                 # Show progress bar
                                        )

Parsing nodes:   0%|          | 0/1500 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/1500 [00:00<?, ?it/s]

In [12]:
# 7.2
from llama_index.core.tools import QueryEngineTool
# 7.0 Query Data
vector_query_engine = index.as_query_engine()

desc = "You will be given the area and city name, and you job is to find and give the properties in those area or city respectively from the rental2.csv file not from the web."
read_tool = QueryEngineTool.from_defaults(
                                             query_engine=vector_query_engine,
                                             description=( desc
                                                           
                                                         ),
                                   #          return_direct=True
                                            )

In [13]:
from tavily import AsyncTavilyClient

async def social_life_on_web(query: str) -> str:
    """Given the City, Locality and Property, search for any nightlife, cafes, gyms, parks, cultural activities, active social communities, networking events, coworking spaces, also areas best suited for young professionals, families, or students. You are not to search for any other information on the web."""
    client = AsyncTavilyClient(api_key="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")
    return str(await client.search(query))

In [14]:
async def review_on_web(query: str) -> str:
    """Given the City, Locality and Property, search for Croudsourced reviews from the web."""
    client = AsyncTavilyClient(api_key="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")
    return str(await client.search(query))

In [15]:
async def nearby_area_on_web(query: str) -> str:
    """Given the City, Locality and Property, search on the web for nearby restaurants, hospitals, schools, grocery stores with ratings, community engagement events, weekend activities, and local festivals."""
    client = AsyncTavilyClient(api_key="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")
    return str(await client.search(query))

In [16]:
async def web_search(query: str) -> str:
    """Given the City, Locality and Property, search on the web for nearby hospitals."""
    client = AsyncTavilyClient(api_key="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")
    return str(await client.search(query))

In [17]:
from llama_index.core.tools import FunctionTool

social_tool = FunctionTool.from_defaults(fn= social_life_on_web)
review_tool = FunctionTool.from_defaults(fn=review_on_web)
nearby_tool = FunctionTool.from_defaults(fn=nearby_area_on_web)
web_search_tool = FunctionTool.from_defaults(fn=web_search)

In [18]:
# 9.0

from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner

# 9.1 Define workers
agent_worker = FunctionCallingAgentWorker.from_tools(
                                                      [read_tool, social_tool, review_tool, nearby_tool], 
                                                      llm=llm, 
                                                      verbose= True,  # Try also False
                                                    )

# 9.2 Define supervisor
agent = AgentRunner(agent_worker)



In [25]:
response = agent.chat(
                      "Give list of 2BHK properties in Brookfield Bangalore from the data in the database"
                      )

print(response)

Added user message to memory: Give list of 2BHK properties in Brookfield Bangalore from the data in the database


BadRequestError: Error code: 400 - {'error': {'message': "Failed to call a function. Please adjust your prompt. See 'failed_generation' for more details.", 'type': 'invalid_request_error', 'code': 'tool_use_failed', 'failed_generation': '<tool_call>\n{"name": "query_engine_tool", "arguments": "{\\"input\\": \\"area: Whitefield, city: Bangalore, property_type: 2BHK\\"}"}\n</tool_call>'}}

In [22]:
response = agent.chat(
                     "for the area just mentioned, give me a list of nearby hospitals using web search")
print(response)

Added user message to memory: for the area just mentioned, give me a list of nearby hospitals using web search


BadRequestError: Error code: 400 - {'error': {'message': "Failed to call a function. Please adjust your prompt. See 'failed_generation' for more details.", 'type': 'invalid_request_error', 'code': 'tool_use_failed', 'failed_generation': '<tool_call>\n{"name": "nearby_area_on_web", "arguments": "{\\"query\\": \\"Whitefield Bangalore hospitals nearby\\"}"}\n</tool_call>'}}

In [21]:
response = agent.chat("now, for the area menntioned, give me a list of nearby restaurants using web search")

print(response)

Added user message to memory: now, for the area menntioned, give me a list of nearby restaurants using web search
=== Calling Function ===
Calling function: nearby_area_on_web with args: {"query": "Whitefield Bangalore restaurants nearby"}
=== Function Output ===
{'query': 'Whitefield Bangalore restaurants nearby', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'title': 'Top 10 Restaurants Near Whitefield You Must Try | FF21', 'url': 'https://www.ff21.in/blog/top-10-restaurants-near-whitefield-you-must-try', 'content': "So, if you're after the hyper-local Karnataka cuisine, Oota is one of the few restaurants near Whitefield Bangalore to offer it in all its glory. Location: 7th Floor, 331, Road 5B, EPIP Zone, Whitefield, Bangalore, Karnataka, 560066. Timings: Breakfast (Saturday & Sunday): 08:00 AM-11:00 AM", 'score': 0.917251, 'raw_content': None}, {'title': 'The 20 Best Whitefield Restaurants In Bangalore', 'url': 'https://www.crazymasalafood.com/the-20-best-w