In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
groq_api_key = os.getenv('GROQ_API_KEY')

In [21]:
from langchain_groq import ChatGroq

llm = ChatGroq(model = 'llama3-8b-8192',groq_api_key=groq_api_key)

### 1) Url loader

In [27]:
from langchain.document_loaders import UnstructuredURLLoader

urls = [
    "https://www.moneycontrol.com/news/business/companies/honeywell-automation-indias-q2-profit-falls-on-sluggish-demand-12854355.html#goog_rewarded",
    "https://www.moneycontrol.com/news/business/companies/torrent-pharma-promoters-likely-to-sell-shares-worth-up-to-rs-3000-cr-via-block-deals-reports-12854399.html",
    "https://www.moneycontrol.com/news/business/markets/federal-bank-q2-fy25-welcoming-the-new-boss-with-a-steady-show-12853544.html#goog_rewarded"
]

In [28]:
loader = UnstructuredURLLoader(urls=urls)
data = loader.load()

### 2) Text splitter

In [29]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
                separators=['\n\n', '\n', '.', ','],
                chunk_size=1000,
                chunk_overlap = 0
            )

chunks = splitter.split_documents(data)

In [41]:
chunks

[Document(metadata={'source': 'https://www.moneycontrol.com/news/business/companies/honeywell-automation-indias-q2-profit-falls-on-sluggish-demand-12854355.html#goog_rewarded'}, page_content="English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nLoans up to ₹15 LAKHS\n\nFixed Deposits\n\nCredit CardsLifetime Free\n\nCredit Score\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nLoans up to ₹15 LAKHS\n\nFixed Deposits\n\nCredit CardsLifetime Free\n\nCredit Score\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nGo Ad-Free\n\nMy Alerts\n\n>->MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_BUSINESS_AS/MC_ENG_ROS_NWS_BUS_AS_ATF_728\n\nGo PRO @₹99 PRO\n\nAdvertisement\n\nRemove Ad\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nHomeNewsBusiness

### 3) Building vector embeddings

In [30]:
from langchain.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer

In [31]:

hf_embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2')

### 4) Store the embeddings in Vectordb

In [32]:
from langchain_community.vectorstores import FAISS

In [33]:
vector_db = FAISS.from_documents(documents=data,embedding=hf_embeddings)

In [34]:
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain

chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever= vector_db.as_retriever())
            

In [48]:
query = "what is the article about?"

In [49]:
result = chain({"question": query}, return_only_outputs=True)

In [50]:
result["answer"]

'The article is about the quarterly performance of various companies, specifically Federal Bank, Honeywell Automation India, and Torrent Pharma.\n\n'