In [15]:
import torch
import transformers
from transformers import AutoTokenizer
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import HuggingFacePipeline
from langchain.chains.question_answering import load_qa_chain
import warnings
warnings.filterwarnings("ignore") 

In [2]:
data_path = r"/home/voldemort/data_science/data/rag/docs/economy"

def load_docs(data_path):
    loader = DirectoryLoader(data_path)
    documents = loader.load()
    return documents

documents = load_docs(data_path)
len(documents)

2

In [3]:
def split_docs(documents,chunk_size=512,chunk_overlap=0):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(documents)
    return docs

docs = split_docs(documents)
len(docs)

6

In [64]:
print(docs[0].page_content)

Key Economic Indicators of India (As of September 2024)

Note: These figures are estimates and subject to change. Please refer to official government sources for the most accurate and up-to-date data.

Macroeconomic Indicators

GDP: Approximately $3.95 trillion (nominal) and $14.59 trillion (PPP)

GDP Growth Rate: Around 7% (estimated for FY2024)



Inflation: Around 5-6% (estimated)

Unemployment Rate: Approximately 6.7% (estimated)

Sectoral Composition

Agriculture: Approximately 15% of GDP


In [5]:
llm_model_embed_name = "google/flan-t5-large"
embeddings = SentenceTransformerEmbeddings(model_name=llm_model_embed_name)

No sentence-transformers model found with name google/flan-t5-large. Creating a new one with MEAN pooling.


In [6]:
index  = Chroma.from_documents(docs, embeddings)

In [11]:
llm_model_name = "meta-llama/Llama-2-7b-chat-hf"
cache_dir = "/home/voldemort/data_science/llm_models"
device = f'cuda:{torch.cuda.current_device()}' if torch.cuda.is_available() else 'cpu'


bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_config = transformers.AutoConfig.from_pretrained(llm_model_name, max_new_tokens=1024)

model = transformers.AutoModelForCausalLM.from_pretrained(
    llm_model_name,
    config=model_config,
    quantization_config=bnb_config,
    device_map=device,
    cache_dir=cache_dir
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [16]:
tokenizer = AutoTokenizer.from_pretrained(llm_model_name,cache_dir=cache_dir)
tokenizer.pad_token = tokenizer.eos_token

In [17]:
pipe  = transformers.pipeline("text-generation",model=model,tokenizer=tokenizer,max_length=1024,temperature=0.1)

In [18]:
llm = HuggingFacePipeline(pipeline=pipe)

In [19]:
chain = load_qa_chain(llm, chain_type="stuff")

In [20]:
def get_similiar_docs(query, k=5, score=False):
  if score:
    similar_docs = index.similarity_search_with_score(query, k=k)
  else:
    similar_docs = index.similarity_search(query, k=k)
  return similar_docs
    
def get_answer(query):
  similar_docs = get_similiar_docs(query)
  answer = chain.run(input_documents=similar_docs, question=query)
  return answer

In [47]:
query = "How much is India's Economy ?"
answer = get_answer(query)

In [50]:
print(answer.split("Helpful Answer:")[1].split("Unhelpful Answer:")[0])

 According to the given information, India's economy is approximately $3.95 trillion (nominal) and $14.59 trillion (PPP) in terms of GDP.




In [51]:
query = "What are the contributions of various sectors to Indian Economy ?"
answer = get_answer(query)

In [52]:
print(answer.split("Helpful Answer:")[1].split("Unhelpful Answer:")[0])

 According to the given information, the major sectors contributing to the Indian economy are:

Agriculture: Approximately 15% of GDP

Industry: Approximately 21% of GDP

Services: Approximately 77.8% of GDP

Therefore, the major contributors to the Indian economy are agriculture, industry, and services. However, please note that these figures are estimates and subject to change, and the Indian economy is a complex and dynamic system. For more in-depth analysis, I recommend consulting official government sources like the Reserve Bank of India (RBI), the Ministry of Statistics and Program Implementation (MoSPI), and the World Bank.


In [62]:
query = "What is the ratio of India's GDP to USA's GDP?"
answer = get_answer(query)

In [63]:
print(answer.split("Helpful Answer:")[1].split("Unhelpful Answer:")[0])

 According to the given data, India's GDP (nominal) is approximately $3.95 trillion, while USA's GDP (nominal) is approximately $26.3 trillion. Therefore, India's GDP is approximately 15% of USA's GDP.


