### Retrival and Streamlit app


In [23]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import WebBaseLoader


In [24]:
from dotenv import load_dotenv
import os

load_dotenv()  # Load variables from .env file

api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("❌ OPENAI_API_KEY is missing. Set it in .env or as an environment variable.")



In [25]:
llm = OpenAI(temperature=0.9, max_tokens=500) 


### Load the browser info

In [26]:
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])
data = loaders.load() 
len(data)

2

In [34]:
data[0].page_content

'English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nLoans up to ₹50 LAKHS\n\nFixed Deposits\n\nCredit CardsLifetime Free\n\nCredit Score\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nGo Ad-Free\n\nMy Alerts\n\nco-presented by\n\nassociated by\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nOptions FestWebinar\n\nHomeNewsBusinessMarketsWall Street rises as Tesla soars on AI optimism\n\nTrending Topics\n\nSensex TodayUnited Breweries Share PriceZen Technologies Share PriceGlenmark Pharma Share Price Senco Gold Share PriceGodfrey Phillips Share Price\n\nWall Street rises as Tesla soars on AI optimism\n\nTesla (TSLA.O) rallied 10% after Morgan Stanley upgraded the electric car

In [35]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=25
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)
len(docs) #docs

30

In [None]:
# from langchain.document_loaders import WebBaseLoader

# loader = WebBaseLoader([
#     "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
#     "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
# ])

# data = loader.load()
# len(data)

In [None]:
# splitter = RecursiveCharacterTextSplitter(
#     #separators = ["\n\n","\n"," "], 
#     chunk_size = 1000,
#     chunk_overlap = 200
# )
# # As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
# chunks = splitter.split_documents(data) #every chunk is a documment type
# len(chunks)

In [37]:
docs[8].page_content

'Advisory Alert:\n\nFollow Us On:\n\nStocks: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z Others\n\nTrending Topics'

In [38]:
# save the vector index
embeddings = OpenAIEmbeddings()
vector_index_openai = FAISS.from_documents(docs, embeddings) #vector store



AuthenticationError: Incorrect API key provided: sk-proj-********************************************************************************************************************************************************b-8A. You can find your API key at https://platform.openai.com/account/api-keys.

In [None]:
file_path = 'vector_index.pkl'

with open(file_path, 'wb') as f:
    pickle.dump(vector_index, f)

In [39]:
# Read the vector index from the file
file_path = 'vector_index.pkl'

if os.path.exists(file_path):
    with open(file_path, "rb") as f:
        vectorIndex = pickle.load(f)    




In [40]:
import os

api_key = os.getenv("OPENAI_API_KEY")
print(f"Loaded API Key: {api_key[:5]}...{api_key[-5:]}")  # Show first & last part (hide middle)


Loaded API Key: sk-pr...Lb-8A


### Retrieve similar embeddings for a given question and call LLM to retrieve final answer


In [41]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorIndex.as_retriever())
chain



In [42]:
import os

api_key = os.getenv("OPENAI_API_KEY")
print(f"Loaded API Key: {api_key[:5]}...{api_key[-5:]}")  # Show first & last part (hide middle)


Loaded API Key: sk-pr...Lb-8A


In [43]:
query = "what is the price of Tiago iCNG?"
# query = "what are the main features of punch iCNG?"

langchain.debug=True



In [47]:
llm

OpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.completion.Completion'>, model_name='text-davinci-003', temperature=0.9, max_tokens=500, top_p=1, frequency_penalty=0, presence_penalty=0, n=1, best_of=1, model_kwargs={}, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy='', batch_size=20, request_timeout=None, logit_bias={}, max_retries=6, streaming=False, allowed_special=set(), disallowed_special='all', tiktoken_model_name=None)

In [44]:
chain({"question": query}, return_only_outputs=True)

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "what is the price of Tiago iCNG?"
}
[31;1m[1;3m[chain/error][0m [1m[1:chain:RetrievalQAWithSourcesChain] [390ms] Chain run errored with error:
[0m"AuthenticationError(message='Incorrect API key provided: sk-xJoAN***************************************sKHH. You can find your API key at https://platform.openai.com/account/api-keys.', http_status=401, request_id=None)"


AuthenticationError: Incorrect API key provided: sk-xJoAN***************************************sKHH. You can find your API key at https://platform.openai.com/account/api-keys.