In [48]:
import os
import streamlit as st
import pickle
import time

# LLM
from langchain_openai import ChatOpenAI

# Embeddings
from langchain_openai import OpenAIEmbeddings

# Vector Store
from langchain_community.vectorstores import FAISS

# Document Loader
from langchain_community.document_loaders import UnstructuredURLLoader

# Text Splitter
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

In [49]:
from secret_key import openapi_equity_research_analyst_project_key
import os
os.environ['OPENAI_API_KEY'] = openapi_equity_research_analyst_project_key

In [50]:
llm = ChatOpenAI(model="gpt-4o-mini")

In [51]:
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/earnings/q4-results-2025-live-updates-hul-axis-bank-nestle-india-sbi-life-tech-mahindra-macrotech-adani-energy-sbi-cards-persistent-q4-earnings-april-24-liveblog-13002888.html",
    "https://www.moneycontrol.com/news/business/markets/goldman-sachs-tops-wall-street-for-fourth-quarter-ecm-revenue-13775065.html"
])
data = loaders.load() 
len(data)

2

In [52]:
#Splitting data into chunks

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

docs = text_splitter.split_documents(data)
print(len(docs))

11


In [53]:
docs[0]

Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/goldman-sachs-tops-wall-street-for-fourth-quarter-ecm-revenue-13775065.html'}, page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nMy Alerts\n\nGo Ad-Free\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nLoans up to ₹50 LAKHS\n\nFixed Deposits\n\nCredit CardsLifetime Free\n\nCredit Score\n\nLoan against MFs\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nNetwork 18\n\n>->MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_MARKETS_AS/MC_ENG_ROS_NWS_MKTS_AS_ATF_728\n\nMoneycontrol\n\nGo PRO NowPRO\n\nMoneycontrol PRO\n\nAdvertisement\n\nRemove Ad\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nEco Pulse\n\nFiDEX 2026\n\nFiDEX 2026\n\nTrending Topi

In [54]:
#Creating vector embeddings for these chunks and save them to FAISS index

# Creating embeddings of the chunks using openAIEmbeddings
embeddings = OpenAIEmbeddings()


In [55]:
# Directory to store FAISS index
index_path = "vector_index"

# Passing the documents and embeddings inorder to create FAISS vector index and save, if index does NOT exist → create and save
if not os.path.exists(index_path):
    vectorindex_openai = FAISS.from_documents(docs, embeddings)
    vectorindex_openai.save_local(index_path)
else:
    # Load existing index
    vectorindex_openai = FAISS.load_local(
        index_path,
        embeddings,
        allow_dangerous_deserialization=True
    )

In [56]:
# Load the saved index
vectorIndex = FAISS.load_local(
    "vector_index",          # folder name
    embeddings,
    allow_dangerous_deserialization=True
)

In [61]:
# Retriever
retriever = vectorIndex.as_retriever()

langchain.debug=True

# Prompt
prompt = ChatPromptTemplate.from_template(
    """
Answer the question using ONLY the provided context.
If the answer is not in the context, say you don't know.

Context:
{context}

Question:
{question}
"""
)

# Build retrieval chain manually
rag_chain = (
    {
        "context": retriever,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
)

# Run
response = rag_chain.invoke("What is the revenue of Goldman Sachs")

print(response.content)

Goldman Sachs reported $521 million of equity underwriting revenue in the final three months of 2025.
