In [14]:
%pip install streamlit



In [15]:
%pip install langchain-community



In [16]:
pip install unstructured



In [18]:
%pip install langchain-groq

Collecting langchain-groq
  Downloading langchain_groq-0.3.8-py3-none-any.whl.metadata (2.6 kB)
Collecting groq<1,>=0.30.0 (from langchain-groq)
  Downloading groq-0.32.0-py3-none-any.whl.metadata (16 kB)
Downloading langchain_groq-0.3.8-py3-none-any.whl (16 kB)
Downloading groq-0.32.0-py3-none-any.whl (135 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.4/135.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq, langchain-groq
Successfully installed groq-0.32.0 langchain-groq-0.3.8


In [49]:
import os
import streamlit as st
import pickle
import time

# LangChain core
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

# ✅ Import Groq LLM wrapper
from langchain_groq import ChatGroq

In [74]:
os.environ['GROQ_API_KEY'] = "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"

In [75]:
from langchain_groq import ChatGroq
import os

llm = ChatGroq(
    groq_api_key=os.environ["GROQ_API_KEY"],
    model_name="meta-llama/llama-guard-4-12b",  # updated model name
    temperature=0.3,
    max_tokens=50
)

In [76]:
from langchain.document_loaders import UnstructuredURLLoader

# ✅ Load web pages into documents
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])
data = loaders.load()

# Check number of documents
print(len(data))

2


In [77]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)

In [78]:
len(docs)

18

In [79]:
docs[0]

Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html'}, page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nLoans up to ₹50 LAKHS\n\nFixed Deposits\n\nCredit CardsLifetime Free\n\nCredit Score\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nNetwork 18\n\nGo Ad-Free\n\nMy Alerts\n\n>->MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_MARKETS_AS/MC_ENG_ROS_NWS_MKTS_AS_ATF_728\n\nMoneycontrol\n\nGo PRO NowPRO\n\nMoneycontrol PRO\n\nAdvertisement\n\nRemove Ad\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nHomeNewsBusinessMarketsWall Street rises as Tesla soars on AI optimism\n\nTrending Topic

In [80]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Create the embeddings of the chunks using HuggingFace
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Pass the documents and embeddings in order to create FAISS vector index
vectorindex = FAISS.from_documents(docs, embeddings)

In [81]:
%pip install faiss-cpu



In [82]:
import pickle

# Storing FAISS vector index locally
file_path = "/content/vector_index.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vectorindex, f)

In [83]:
from langchain.vectorstores import FAISS

file_path = "/content/faiss_index"

# Load FAISS index if it exists
if os.path.exists(file_path):
    vectorIndex = FAISS.load_local(file_path, embeddings, allow_dangerous_deserialization=True)
else:
    vectorIndex = FAISS.from_documents(docs, embeddings)
    vectorIndex.save_local(file_path)

In [84]:
from langchain.chains import RetrievalQA

chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorIndex.as_retriever(),
    chain_type="stuff"
)

In [85]:
query = "what is the net sales in march 2025?"
# query = "what are the main features of nalco?"

langchain.debug=True

chain({"query": query}, return_only_outputs=True)

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "what is the net sales in march 2025?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "what is the net sales in march 2025?",
  "context": "Manappuram Finance Q1 Results\n\nTata Motors Q1 results\n\nTitan Q1 Preview\n\nSBI Q1 Preview\n\nNALCO Consolidated March 2025 Net Sales at Rs 5,267.83 crore, up 47.19% Y-o-Y\n\nBroker Research\n\nJune 24, 2025 / 13:50 IST\n\njoin Us on WhatsApp\n\nFollow Us On Google\n\nAdd as a Preferred Source on Google\n\n\n\nWatchlist\n\nPortfolio\n\nMessage\n\nSet Alert\n\n26 Aug, 2025 12:21\n\nVolume\n\nTodays L/H\n\nMore\n\nReported Consolidated quarterly numbers for National Aluminium Company are:\n\nNet Sales at Rs 5,267.83 c

{'result': 'safe'}