# Step 1 : Creating an Index

In [47]:
from langchain_community.document_loaders import UnstructuredURLLoader

links = ['https://cleartax.in/s/budget-2024-highlights',
        'https://www.hindustantimes.com/budget',
        'https://www.livemint.com/economy/budget-2024-key-highlights-live-updates-nirmala-sitharaman-infrastructure-defence-income-tax-modi-budget-23-july-11721654502862.html',
        'https://economictimes.indiatimes.com/news/economy/policy/budget-2024-highlights-india-nirmala-sitharaman-capex-fiscal-deficit-tax-slab-key-announcement-in-union-budget-2024-25/articleshow/111942707.cms?from=mdr']

loader = UnstructuredURLLoader(urls = links, show_progress = True)

In [50]:
data = loader.load()

In [51]:
print(len(data))
print(type(data))
print(type(data[0]))

4
<class 'list'>
<class 'langchain_core.documents.base.Document'>


In [52]:
print(data[0])

page_content='Don't Wait! File Before July 31st and Avoid Penalties!

Start now and Finish filing in no time.

File Now

AI Summary to Minimize your effort

Index

Index

Budget 2024 Highlights: PDF Download, Key Takeaways, Important Points

By Mohammed S Chokhawala

Updated on: Jul 27th, 2024

17 min read

Budget Day is an eagerly anticipated event in India, with both businesses and the general public waiting with bated breath to understand the schemes and initiatives that could potentially benefit them. This year, due to the elections, Budget 2024 has been announced later than usual i.e. February 1.

The FM announced the Final Budget on 23rd July 2024. She stated that this year’s budget is focused on employment, skilling, MSME and the middle class. She also stated the priorities of the budget which were as follows:

Productivity and Resilience in Agriculture

Employment and Skilling

Inclusive Human Resource Development and Social Justice

Manufacturing and Services

Urban Developmen

# Step 2. chunking the data

In [53]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    separators = ["\n\n", "\n", " ", ""],
    chunk_size = 1000, 
    chunk_overlap = 50)

In [54]:
chunks = text_splitter.split_documents(data)

# Step 3. Importing the Embeddings model

In [55]:
key = ""

with open("key.txt", "r") as file:
    key = file.read()

In [56]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

model_name = "models/embedding-001"

Embeddings = GoogleGenerativeAIEmbeddings(
    model = model_name,
    google_api_key = key
)

# Step 4. Connection with ChromaDB

In [57]:
from langchain_chroma import Chroma

In [58]:
db = Chroma(
    collection_name = "budget_database",
    embedding_function = Embeddings,
    persist_directory = "./chroma_db_"
)

In [59]:
 db.add_documents(chunks)

['bc5477ab-85e3-4af7-8786-c93a093726bc',
 'd93138ac-b9a7-4cf9-b88d-708fc26841e7',
 '44253198-8915-467b-9225-95ea68bd13b6',
 '60cc9bab-c8a1-40cc-b708-e176c75dfebc',
 '985edebe-fed7-4c8c-9c7e-54b14f964c0b',
 'ab4b1f57-b502-4a51-a732-5ce77adabb80',
 'facb0b55-b037-4f11-b328-bc2b5e97ff69',
 '3b791355-54e3-4b27-88b9-0d644debc3dd',
 '27aa2e30-4f2b-4210-ac50-09b5244d634f',
 '258d8c3f-e4b7-4bb9-a3f9-f054233fa6b5',
 '1b850195-1b2d-4972-a058-cb9ad0510b6d',
 '7865d7a2-f49c-4460-8764-c03035da4d89',
 '210c911d-95ec-4be0-a8a4-6535f222fc7c',
 '605004bf-e58f-4539-857a-644820b1f074',
 '27101073-8e94-438f-9195-ea29030772c9',
 'c0498737-7180-4a5c-b0fc-d731cec5e359',
 '84d13b05-9212-4343-bb90-165d3ce76e11',
 '7a198d67-a2ad-4051-85fb-ef67d3c5f8f3',
 'e5d9f7d1-e321-4ae1-b2b6-df622bb8e815',
 '86770128-df37-4e84-97fa-be0ea19213df',
 '4690e926-dd25-41da-a08a-14a075aeb6d7',
 '8ea8c9a8-6c4a-4375-9cad-2ef481f2c7d5',
 '723315f1-16a9-4158-ac9d-737fdff4ce7c',
 '90a2de44-0aba-41c4-a6e8-4b2d8493aced',
 '1211ebab-e20c-

# Step 5. Creating a Retreiver Object

In [60]:
retriever = db.as_retriever(search_type = "similarity", search_kwargs = {"k" : 3})

# Step 6. Creating a Chat Prompt Template 

In [61]:
from langchain_core.prompts import ChatPromptTemplate

PROMPT_TEMPLATE = """
Analyze the budget based only on the following financial data:
{financial_data}

Perform a budget analysis addressing this question: {question}

Provide a detailed analysis.
Focus solely on the information given in the financial data.
Do not include external information or assumptions.
Present your analysis directly without qualifying phrases.
"""

prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)

# Step 7. Creating a Chat Model

In [62]:
from langchain_google_genai import ChatGoogleGenerativeAI

chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash",
                             google_api_key = key)

# Step 8. Creating an Output Parser

In [63]:
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()

# Step 9. Creating a RAG chain

In [66]:
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return " ".join(doc.page_content for doc in docs)

rag_chain = {"financial_data" : retriever | format_docs , "question" : RunnablePassthrough()} | prompt_template | chat_model | output_parser

# Step 10 . Invoking the RAG chain

In [68]:
question = "2024 Budget Highlights"
response = rag_chain.invoke(question)

print(response)

The 2024 Budget highlights several key areas of focus:

* **Employment and Skilling:** The budget emphasizes initiatives aimed at creating job opportunities and enhancing skills development. 
* **MSME Support:** The budget includes provisions for financial assistance and support for the Micro, Small, and Medium Enterprises (MSME) sector.
* **Middle Class Focus:** The budget aims to benefit the middle class with various schemes and initiatives.
* **Agriculture:** The budget prioritizes measures to improve agricultural productivity and resilience. 
* **Infrastructure Development:** The budget allocates resources for infrastructure development.
* **Fiscal Deficit:** The budget projects a reduction in the fiscal deficit to 4.9% in FY2025 with a commitment to further reducing it to 4.5%.

The budget also mentions a new tax slab structure and focuses on inclusive human resource development, social justice, manufacturing, services, and urban development. 



In [69]:
question = "2024 Budget Allocation for Andhra Pradesh"
response = rag_chain.invoke(question)

print(response)

## 2024 Budget Allocation for Andhra Pradesh

* **Amaravati Development:** Rs. 15,000 crore financial assistance in the current year, with additional amounts promised for future years.
* **Polavaram Irrigation Project:**  Full financing and early completion.
* **Essential Infrastructure:** Funding for water, power, railways, and roads as per the AP Reorganisation Act.
* **Backward Region Grants:** Funding for backward regions of Rayalaseema, Prakasam, and North coastal Andhra Pradesh as per the AP Reorganisation Act. 
* **Vizag-Chennai Industrial Corridor:**  Package announced for the Kopparthy node.
* **Hyderabad-Bengaluru Industrial Corridor:** Funding for essential infrastructure in the Orvakal node.
* **PM Awas Yojana:**  Three crore additional houses in rural and urban areas.
* **Women-led Development:** Allocation of over Rs. 3 lakh crore to promote women-led development and benefit women and girls. 

