In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS

from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate

from langchain.chains import RetrievalQA



In [2]:
## Read the ppdfs from the folder
loader=PyPDFDirectoryLoader("./economicSurvey")

documents=loader.load()

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)

final_documents=text_splitter.split_documents(documents)
final_documents[0]

Document(page_content='Economic\nSurvey 2022-23\nGovernment of  India\nMinistry of  Finance\nDepartment of  Economic Affairs\nEconomic Division\nNorth Block\nNew Delhi-110001\nJanuary, 2023', metadata={'source': 'economicSurvey\\Economic Survey 22-23.pdf', 'page': 0})

In [3]:
len(final_documents)

2906

In [8]:
## Embedding Using Huggingface
from tqdm.autonotebook import tqdm, trange
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",      #sentence-transformers/all-MiniLM-l6-v2
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}
)

In [9]:
import numpy as np
print(np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)))
print(np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)).shape)

[-1.65690295e-02 -3.99196185e-02 -1.05896322e-02 -2.40996405e-02
  5.45143895e-02  5.91869093e-02  2.27332693e-02  2.64223088e-02
 -2.88180243e-02  9.88948625e-04  5.38988262e-02  1.19647318e-02
 -1.73696643e-03  3.74568217e-02  1.65536404e-02 -4.17738892e-02
 -3.17145362e-02 -1.10766619e-01  2.72003040e-02  3.35501544e-02
  8.18391517e-02 -2.35597789e-03  1.70991942e-02 -9.98104289e-02
  2.07515247e-02  1.86326094e-02  2.26712283e-02 -7.87614807e-02
 -2.76551973e-02 -1.17993332e-01  6.24427125e-02 -6.86210319e-02
  2.94928458e-02 -3.00586317e-02  2.11289860e-02 -1.67274184e-03
  2.82015484e-02  9.09048170e-02  2.33434886e-02 -2.97657140e-02
 -3.56337465e-02  1.88697781e-02  9.72870388e-04 -4.37775925e-02
  5.55217080e-03 -3.33155543e-02 -2.12661456e-02  1.95751972e-02
 -3.18237878e-02 -3.61032486e-02  9.46649164e-03 -8.38591307e-02
 -8.96006916e-03  2.00300757e-02  3.03959101e-02 -2.49167401e-02
 -1.93350352e-02 -2.23947465e-02  1.54758170e-02  8.12176168e-02
  7.12768361e-02  1.87031

In [10]:
## VectorStore Creation
vectorstore=FAISS.from_documents(final_documents[:120],huggingface_embeddings)

In [12]:
## Query using Similarity Search
query="India’s Electricity Consumption in Agriculture?"
relevant_documents=vectorstore.similarity_search(query)

print(relevant_documents[0].page_content)

VIII.3 India’s Electricity Consumption in agriculture (Annual) 245
VIII.4 Sustained increase in Foodgrains Production in India (Million Tonnes) 245
VIII.5Minimum	Support	Price	for	selected	Kharif	and	Rabi	Crops	(₹/Quintal) 246
VIII.6Continued	Increase	in	Institutional	Credit	to	 Agriculture	Sector	(₹	lakh	Crores)	 247
VIII. 7 Though the Crop sector is still the major contributor to agriculture GV A, the livestock sector is catching up (in per cent) 252
VIII.8 Top ten states from Multi-State Cooperative Societies as on 20 October 2022 254
VIII.9 Allocation of Foodgrains under NFSA and Other Welfare Schemes in 2022-23 and as of	1	January	2023	(LMT)257
VIII.10 Total Food Subsidy released by the Government of India since 2014-15 (thousand crore)258
IX.1 Private Investment gathers momentum 263
IX.2PMI	Manufacturing	remains	in	expansionary	zone 266
IX.3 Sub-indices of IIP growing at a healthy pace (Apr-Nov) 266
IX.4Steady	Growth	in	Components	of	Index	of	Core	Industries 266


In [13]:
retriever=vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":3})
print(retriever)

tags=['FAISS', 'HuggingFaceBgeEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001DB9E7AC440> search_kwargs={'k': 3}


In [61]:
import os
os.environ['HUGGINGFACEHUB_API_KEY']=str(os.getenv("huggingface_api"))

'The Hugging Face Hub is an platform with over 350k models, 75k datasets, and 150k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.

In [62]:
from langchain_community.llms import HuggingFaceEndpoint

hf=HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-v0.1",
    model_kwargs={"max_length":500},
    temperature=0.1
)
query="what is India’s Electricity Consumption in Agriculture?"
hf.invoke(query)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\supra\.cache\huggingface\token
Login successful


'\n\nIndia’s electricity consumption in agriculture is a significant contributor to the country’s overall energy consumption. Agriculture is a major sector in India, accounting for around 17% of the country’s GDP and employing around 50% of the country’s workforce.\n\nElectricity is used in agriculture for a variety of purposes, including irrigation, pumping water, and operating machinery. The use of electricity in agriculture has increased significantly in recent years, as the country has seen a shift towards mechanized farming.\n\nAccording to data from the Central Electricity Authority (CEA), India’s electricity consumption in agriculture was around 100 billion units in 2020-21. This represents around 6% of the country’s total electricity consumption.\n\nThe use of electricity in agriculture has several benefits. It can help to increase crop yields, reduce the need for manual labor, and improve the efficiency of farming operations. However, it also has some drawbacks, including the 

In [71]:
#Hugging Face models can be run locally through the HuggingFacePipeline class.
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

hf = HuggingFacePipeline.from_model_id(
    model_id="Qwen/Qwen2-0.5B-Instruct",
    task="text-generation",
    pipeline_kwargs={"temperature": 0.1, 
            "max_new_tokens": 300}
)

llm = hf 
llm.invoke(query)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


"India’s Electricity Consumption in Agriculture: A Study of the Impact on Food Security and Economic Growth\n\n# India's Electricity Consumption in Agriculture: A Study of the Impact on Food Security and Economic Growth\n\n## Abstract\n\nThe aim of this study is to examine the impact of electricity consumption in agriculture on food security and economic growth. The study was conducted using a cross-sectional survey methodology, which involved collecting data from 100 farmers in the state of Haryana, India. The results showed that the electricity consumption in agriculture had a significant negative impact on food security and economic growth. Specifically, the study found that the higher the electricity consumption in agriculture, the lower the agricultural productivity and the lower the income per head. Furthermore, the study also revealed that the electricity consumption in agriculture has a direct correlation with the level of poverty in the rural areas.\n\n## Keywords\n\nElectrici

In [72]:
prompt_template="""
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

{context}
Question:{question}

Helpful Answers:
 """

In [73]:
prompt=PromptTemplate(template=prompt_template,input_variables=["context","question"])

In [74]:
retrievalQA=RetrievalQA.from_chain_type(
    llm=hf,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)

In [75]:
query="""India’s Electricity Consumption in Agriculture"""

In [76]:
# Call the QA chain with our query.
result = retrievalQA.invoke({"query": query})
print(result['result'])


Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

VIII.3 India’s Electricity Consumption in agriculture (Annual) 245
VIII.4 Sustained increase in Foodgrains Production in India (Million Tonnes) 245
VIII.5Minimum	Support	Price	for	selected	Kharif	and	Rabi	Crops	(₹/Quintal) 246
VIII.6Continued	Increase	in	Institutional	Credit	to	 Agriculture	Sector	(₹	lakh	Crores)	 247
VIII. 7 Though the Crop sector is still the major contributor to agriculture GV A, the livestock sector is catching up (in per cent) 252
VIII.8 Top ten states from Multi-State Cooperative Societies as on 20 October 2022 254
VIII.9 Allocation of Foodgrains under NFSA and Other Welfare Schemes in 2022-23 and as of	1	January	2023	(LMT)257
VIII.10 Total Food Subsidy released by the Government of India since 2014-15 (thousand crore)258
IX.1 Private Investment gathers momentum 263
IX.2PMI	Manufacturing	remains	in	expansionary	zone 266
IX.3 Sub-indices of