In [1]:
#!pip install -U nltk
#!pip install langchain-community
#!pip install --upgrade langchain-community
#!pip install -U langchain-groq
#!pip install unstructured
#!pip install unstructured libmagic python-magic python-magic-bin
#!pip install sentence-transformers
#!pip install faiss-cpu

In [2]:
import warnings
warnings.filterwarnings("ignore") 

In [3]:
import os
import pickle
import nltk
nltk.download('averaged_perceptron_tagger_eng')
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import UnstructuredURLLoader
from langchain_groq import ChatGroq

[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\SAI KIRITI\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


In [4]:
def set_groq_api_key():
    return "your groq api key here"

def load_documents(urls):
    loader = UnstructuredURLLoader(urls=urls)
    documents = loader.load()
    
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    texts = text_splitter.split_documents(documents)
    
    return texts

def create_vector_index(texts):
    embeddings = HuggingFaceEmbeddings()
    vector_index = FAISS.from_documents(texts, embeddings)
    return vector_index

def save_vector_index(vector_index, filename='vector_index.pkl'):
    with open(filename, 'wb') as f:
        pickle.dump(vector_index, f)

def load_vector_index(filename='vector_index.pkl'):
    with open(filename, 'rb') as f:
        vector_index = pickle.load(f)
    return vector_index

def create_qa_chain(api_key, vector_index):
    llm = ChatGroq(
        api_key=api_key,
        model_name="llama3-8b-8192",
        temperature=0.9,
        max_tokens=500,
        groq_api_key="your groq api key here") # Specify model for ChatGroq
    
    return RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vector_index.as_retriever())

def ask_query(chain, query):
    result = chain({"question": query}, return_only_outputs=True)
    return result

In [5]:
def main(urls, query):
    api_key = set_groq_api_key()
    
    texts = load_documents(urls)
    
    vector_index = create_vector_index(texts)
    
    qa_chain = create_qa_chain(api_key, vector_index)
    
    return ask_query(qa_chain, query)

In [9]:
urls = [
    "https://www.moneycontrol.com/news/business/earnings/indusind-bank-q2-net-profit-falls-40-on-year-to-rs-1331-crore-misses-expectations-12849613.html"
]

query = "How much net profit for Indusind bank ?"
    
result = main(urls, query)
print(result)

  embeddings = HuggingFaceEmbeddings()


{'answer': "FINAL ANSWER: IndusInd Bank's net profit for Q2 FY25 is Rs 1,331 crore.\n", 'sources': 'https://www.moneycontrol.com/news/business/earnings/indusind-bank-q2-net-profit-falls-40-on-year-to-rs-1331-crore-misses-expectations-12849613.html'}
