In [1]:
from llama3.utils import get_llama3, LlamaIndexCustomLLM
from llama3.config import DATA_DIR, MODELS_DIR
import os

llm_mhop, tokn_mhop = get_llama3(adapter_path=os.path.join(MODELS_DIR, "Meta-Llama-3-8B-Instruct_ft_mhop"))
cust_llm_mhop = LlamaIndexCustomLLM(llm_mhop, tokn_mhop, num_output=1000)
llm_raft, tokn_raft = get_llama3(adapter_path=os.path.join(MODELS_DIR,"Meta-Llama-3-8B-Instruct_ft_raft"))
cust_llm_raft = LlamaIndexCustomLLM(llm_raft, tokn_raft, num_output=1000)




Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [34]:
from llama_index.core.agent import ReActAgent

from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
)

from llama_index.core.tools import QueryEngineTool, ToolMetadata

In [3]:
try:
    storage_context = StorageContext.from_defaults(
        persist_dir=os.path.join(DATA_DIR, "rag_storage","rice")
    )
    rice_index = load_index_from_storage(storage_context)

    index_loaded = True
except:
    index_loaded = False

In [4]:
index_loaded

True

In [5]:
if not index_loaded:
    # load data
    rice_docs = SimpleDirectoryReader(
        input_files=[os.path.join(DATA_DIR, "nsdl_books", "RICE-FORMATTED.pdf")]
    ).load_data()
    # build index
    rice_index = VectorStoreIndex.from_documents(rice_docs)
    # persist index
    rice_index.storage_context.persist(persist_dir=os.path.join(DATA_DIR, "rag_storage","rice"))


In [6]:
rice_ret = rice_index.as_retriever()

In [7]:
rice_ret.retrieve("What is botanical name of rice")

[NodeWithScore(node=TextNode(id_='99699a5a-ffcd-4942-9dc6-cadaf017d7a7', embedding=None, metadata={'page_label': '22', 'file_name': 'rice.pdf', 'file_path': 'data/rice.pdf', 'file_type': 'application/pdf', 'file_size': 403959, 'creation_date': '2024-05-29', 'last_modified_date': '2024-05-29'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='a8a74414-3bbd-431f-808e-c1f553d2a992', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '22', 'file_name': 'rice.pdf', 'file_path': 'data/rice.pdf', 'file_type': 'application/pdf', 'file_size': 403959, 'creation_date': '2024-05-29', 'last_modified_date': '2024-05-29'}, hash='2149e387408fd66bccbd1b715d6925237a974a69c98e7ff17d647b983125aff1')}, text='Source: w

In [8]:
# query_engine = rice_index.as_query_engine(llm=cust_llm_pop, similarity_top_k=1) # send a RAFT ft llm here ?
query_engine = rice_index.as_query_engine(similarity_top_k=3, llm=cust_llm_raft, response_mode='compact') # send a RAFT ft llm here ?

In [9]:
from llama_index.core import PromptTemplate

query_engine_qa_str = """
Given the context information answer the query. Answer precisely and concisely using given facts. Use your knowledge to answer if context does not provide anwer directly.

Example: 
Context: 
Table 1. Rice, acrea ge, production a nd productivity in major coun tries  \nContinent\/C ountry Area (m ha ) Production (m t) Yield (t\/ha) \nAsia 136.56 532.98 3.90 \nChina 29.30 185.50 6.33 \nIndia 43.00 129.00 3.00 \nIndonesia 11.80 54.00 4.58 \nBangla desh 11.00 40.10 3.64 \nThaila nd 10.20 27.00 2.65 \nVietnam 7.34 36.30 4.95 \nMyanmar 6.27 24.50 3.91 \nPhilippi nes 4.12 14.80 3.60 \nPakistan 2.40 6.90 2.88 \nBrazil* 3.73 13.28 3.62 \nJapan* 1.70 10.91 6.42 \nUSA* 1.35 10.47 7.78 \nEgypt* 0.65 6.35 9.84 \nProduction values are for brown (unhusked) rice \nSource : FAO Quarterly Bu lletin  of Statistics, 20 05, Vol. 12(3\/3):18-19 an d * FAO Produ ction Year  \nBook, 200 4. 2 \nIn India, rice is the m ost im portant a nd extensively grown food crop, occupying about 43 \nmillion ha o f land. It is grown in almost all the states, with  Uttar Pradesh, W est Bengal and  \nPunjab leading in area, production and productivity, respectively (Table 2).
---------------------
Query: "What is the yield of rice per hectare in India?"
Answer: The yield of rice per hectare in India is 3.00 tonnes. <|eot_id|>

Context:
{context_str}
---------------------
Query: {query_str}
Answer: 
"""
query_engine_qa_prompt = PromptTemplate(query_engine_qa_str)

  query_engine_qa_str = """


In [10]:
query_engine.update_prompts({'response_synthesizer:text_qa_template': query_engine_qa_prompt})

In [11]:
# print(query_engine.get_prompts()['response_synthesizer:text_qa_template'].get_template())

In [12]:
# query_engine.query("what is bonatnical name of rice")

In [13]:
query_engine_tool = QueryEngineTool(
        query_engine=query_engine,
        metadata=ToolMetadata(
            name="query_engine",
            description=(
                "Provides detailed information about rice"
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    )

In [14]:
from llama_index.core import PromptTemplate

react_system_header_str = """\
You are a helpful assistant.
You have access to the following tools:
{tool_desc}
You are responsible for using
the tools in any sequence you deem appropriate to complete the task at hand.
This may require breaking the task into subtasks and using different tools
to complete each subtask.

Please ALWAYS start with a Thought.
Please use a valid JSON format for the Action Input.

To answer the question, please use the following format.
Thought: I need to use a tool to help me answer the question.
Action: tool name (one of {tool_names}) if using a tool.
Action Input: the input to the tool, in a JSON format representing the kwargs

Example:
What is the botanical name of staple food of 2/3 of the worlds population.
Thought: To answer the query I need to know the staple food of 2/3 popualtion of the world.
Action: query_engine
Action Input: {{'input': 'What is the staple food of about 2/3 of the world's population?'}} <|eot_id|>
Observation: Rice is staple food of about 2/3 population of world.
Thought: Now that I know rice is  the staple food of 2/3 popualtion of the world, I need botanical name of Rice
Action: query_engine
Action Input: {{'input': 'What is the botanical name of rice?'}} <|eot_id|>
Observation: Oryza sativa L
Thouhgt: I can answer without any more tools!
Answer: Botanical name of staple food of 2/3 of the worlds population is Oryza sativa L <|eot_id|>

Now it's your turn
"""
react_system_prompt = PromptTemplate(react_system_header_str)

In [15]:
agent = ReActAgent.from_tools([query_engine_tool], llm=cust_llm_mhop, verbose=True)

In [16]:
agent.update_prompts({"agent_worker:system_prompt": react_system_prompt})

In [32]:
query = "What type of intercrops I can grow in between rice ?"

In [18]:
# prompt = react_system_header_str + query
# tool_desc = (
#             f"> Tool Name: {query_engine_tool.metadata.name}\n"
#             f"Tool Description: {query_engine_tool.metadata.description}\n"
#             f"Tool Args: {query_engine_tool.metadata.fn_schema_str}\n"
#         )
# tool_names = [query_engine_tool.metadata.name]
# prompt = prompt.format(tool_desc=tool_desc, tool_names=tool_names)

In [19]:
# print(prompt)

In [20]:
# cust_llm.complete(prompt+"\nassistant:\n")

In [33]:
agent.reset()
response = agent.chat(query)

[1;3;38;5;200mThought: To answer the question, I need to know what types of intercrops can be grown in between rice.
Action: query_engine
Action Input: {'input': 'What types of intercrops can be grown in between rice?'}
[0m[1;3;34mObservation: Unfortunately, the context does not provide a direct answer to this query. However, based on general knowledge, it is known that rice is often intercropped with other crops to enhance crop yields and reduce pests and diseases. Some common intercrops grown in between rice include legumes like cowpeas, mung beans, and soybeans, as well as other crops like maize, sorghum, and vegetables. The choice of intercrop depends on factors like climate, soil type, and market demand.
[0m[1;3;38;5;200mThought: Now that I know some common intercrops that can be grown in between rice, I need to know more about the benefits of intercropping with rice.
Action: query_engine
Action Input: {'input': 'What are the benefits of intercropping with rice?'}
[0m[1;3;3

In [None]:
# agent.reset()
# response = agent.chat(query)

In [24]:
print(query)

Since tomato prices are on the lower side I have farm land in  karnataka, maharashtra, odhisa and assam, please recommend alternative crops best suited for me


mhop + raft 
query: Since tomato prices are on the lower side I have farm land in  karnataka, maharashtra, odhisa and assam, please recommend alternative crops best suited for me
Thought: To recommend alternative crops, I need to know the climate and soil type suitable for those crops.
Action: query_engine
Action Input: {'input': 'What are the common crops that can be grown in Karnataka, Maharashtra, Odisha, and Assam?'}
Observation: Rice is the most important and extensively grown food crop in India, occupying about 43 million ha of land. It is grown in almost all the states, with Uttar Pradesh, West Bengal and Punjab leading in area, production and productivity, respectively (Table 2). 
The common crops that can be grown in Karnataka, Maharashtra, Odisha, and Assam are:
1. Rice - grown in Karnataka, Maharashtra, Odisha, and Assam.
2. Wheat - grown in Karnataka, Maharashtra, and Assam.
3. Jute - grown in Assam and Odisha.
4. Sugarcane - grown in Karnataka, Maharashtra, and Odisha.
5. Cotton - grown in Karnataka, Maharashtra, and Assam.
6. Tobacco - grown in Karnataka, Maharashtra, and Assam.
7. Coffee - grown in Karnataka.
8. Tea - grown in Assam.
9. Rubber - grown in Assam.
10. Cashew - grown in Karnataka and Odisha.
These crops are commonly grown in these states due to the suitable climate, soil, and water availability.
Thought: Since tomato prices are on the lower side, I need to recommend alternative crops that are less dependent on tomato prices.
Action: query_engine
Action Input: {'input': 'What are the crops that have a stable demand and less dependent on tomato prices?'}
Observation: There is no information in the given context about crops that have a stable demand and less dependent on tomato prices. The context is about rice and does not provide any information about other crops or their demand.
Thought: I need to analyze the climate and soil type of the regions to recommend alternative crops.
Action: query_engine
Action Input: {'input': 'What are the crops that are suitable for the regions of Karnataka, Maharashtra, Odisha, and Assam?'}
Observation: According to the context, the crops suitable for the regions are:
- Karnataka: Jeerakasala, Gandhkasala, Sindigi (local)
- Maharashtra: Ambemoha 102, Ambemoha 147, Ambemoha 159, Krishnasal
- Odisha: Badshah Pasand, Govindabhog, Kataribhog, Sitabhog
- Assam: Not mentioned specifically, but it is mentioned that rice is grown in Assam.
Thought: Based on the observation, I can recommend alternative crops that are less dependent on tomato prices and suitable for the regions.
Action: query_engine
Action Input: {'input': 'What are the crops that are less dependent on tomato prices and suitable for the regions of Karnataka, Maharashtra, Odisha, and Assam?'}
Observation: From the given context, it can be inferred that rice is the crop that is less dependent on tomato prices and suitable for the regions of Karnataka, Maharashtra, Odisha, and Assam. The context mentions rice varieties suitable for these regions, such as Jeerakasala, Gopalbhog, Ambemoha, and others.


!!!!!! No match
Observation: Error: Could not parse output. Please follow the thought-action-input format. Try again.
Thought: I need to recommend alternative crops that are less dependent on tomato prices and suitable for the regions.
Action: query_engine
Action Input: {'input': 'What are the crops that are less dependent on tomato prices and suitable for the regions of Karnataka, Maharashtra, Odisha, and Assam?'}
Observation: Crops like rice, wheat, and sugarcane ae less dependent on tomato prices and can be suitable for the regions of Karnataka, Maharashtra, Odisha, and Assam.
Thought: I will recommend rice, wheat, and sugarcane as alternative crops for the regions of Karnataka, Maharashtra, Odisha, and Assam.
Answer: Rice, wheat, and sugarcane are recommended as alternative crops that are less dependent on tomato prices and suitable for the regions of Karnataka, Maharashtra, Odisha, and Assam.

In [None]:
## base llama3 output
# Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
# Thought: I need to use a tool to help me answer the question.
# Action: query_engine
# Action Input: {'input': 'What is the croping season of main type of wheat grown in the major wheat producing zone of India?'}
# Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
# Observation: The cropping season of the main type of wheat grown in the major wheat producing zone of India is Rabi, which typically occurs from 
# March to June.
# Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
# Observation: Error: Could not parse output. Please follow the thought-action-input format. Try again.
# Thought: I need to use a tool to help me answer the question.
# Action: query_engine
# Action Input: {'input': 'What is the cropping season of wheat in India?'}
# Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
# Observation: The cropping season of wheat in India is known as the Rabi season, which typically occurs from March to June.
# Thought: (Implicit) I can answer without any more tools!
# Answer: 

# Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
# Thought: I need to know the croping season of main type of wheat grown in the major wheat producing zone of India.
# Action: query_engine
# Action Input: {'input': 'What is the croping season of main type of wheat grown in the major wheat producing zone of India?'}
# Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
# Observation: The cropping season of the main type of wheat grown in the major wheat producing zone of India is Rabi, which typically occurs from 
# March to June.
# Thought: (Implicit) I can answer without any more tools!
# Answer: 

# Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
# Thought: I need to use a tool to help me answer the question.
# Action: query_engine
# Action Input: {'input': 'What is the croping season of main type of wheat grown in the major wheat producing zone of India?'}
# Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
# Observation: The cropping season of the main type of wheat grown in the major wheat producing zone of India is Rabi, which typically occurs from March 
# to June.
# Thought: Now that I know the cropping season of the main type of wheat grown in the major wheat producing zone of India, I need to know the botanical 
# name of wheat.
# Action: query_engine
# Action Input: {'input': 'What is the botanical name of wheat?'}
# Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
# Observation: Triticum aestivum
# Thought: I can answer without any more tools!
# Answer: The croping season of main type of wheat grown in the major wheat producing zone of India is Rabi, which typically occurs from March to June, 
# and the botanical name of wheat is Triticum aestivum.

In [None]:
# Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
# Thought: To answer the query I need to know the main type of wheat grown in the major wheat producing zone of India.
# Action: query_engine
# Action Input: {'input': 'What is the main type of wheat grown in the major wheat producing zone of India?'}
# Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
# Observation: T 
# Thought: Now that I know the main type of wheat grown in the major wheat producing zone of India, I need to know the cropping season of
# that type of wheat.
# Action: query_engine
# Action Input: {'input': 'What is the cropping season of high-yielding and disease-resistant varieties of wheat?'}
# Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
# Observation: The cropping season of high-yielding and disease-resistant varieties of wheat is typically during the Rabi season, which 
# generally ranges from March to June in various states of India.
# Thought: I can answer without any more tools!
# Answer: The cropping season of main type of wheat grown in the major wheat producing zone of India is typically during the Rabi season, 
# which generally ranges from March to June in various states of India.