In [47]:
import os
import streamlit as st
import pickle
import time

from dotenv import load_dotenv
load_dotenv()

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.documents import Document
from langchain_core.runnables import RunnableParallel, RunnablePassthrough


In [83]:
#load openAI api key
import os
os.environ['OPENAI_API_KEY'] = "your-openai-api-key-here"


In [58]:
# Initialise LLM with required params
llm = OpenAI(temperature=0.9, max_tokens=500) 

### (1) Load data

In [59]:
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])
data = loaders.load() 
len(data)

2

### (2) Split data to create chunks

In [60]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)

In [61]:
len(docs)

106

In [62]:
docs[0]

Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html'}, page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nTrending Stocks\n\nOrkla India INE16NZ01023, ORKLAINDIA, 544595\n\nSuzlon Energy INE040H01021, SUZLON, 532667\n\nOla Electric INE0LXG01040, OLAELEC, 544225\n\nLG Electronics INE324D01010, LGEINDIA, 544576\n\nVodafone Idea INE669E01016, IDEA, 532822\n\n\n\nQuotes\n\nMutual Funds\n\nCommodities\n\nFutures & Options\n\nCurrency\n\nNews\n\nTopic\n\nCryptocurrency\n\nForum\n\nNotices\n\nVideos\n\nGlossary\n\nAll\n\nHello, Login Hello, LoginLog-inor Sign-UpMy AccountMy Profile My PortfolioMy WatchlistMy AlertsMy MessagesPrice AlertsMy Profile My PROMy PortfolioMy WatchlistMy AlertsMy MessagesPrice AlertsLogoutLoans up to â‚¹50 LAKHSFixed DepositsCredit CardsLifetime Free Credit ScoreChat with UsDownload AppFollow us on:\n\nGo Ad-Free\n\nMy Alerts')

### (3) Create embeddings for these chunks and save them to FAISS index

In [85]:
openai_key = "your-openai-api-key-here"

In [86]:
embeddings = OpenAIEmbeddings(openai_api_key=openai_key)

# Pass the documents and embeddings inorder to create FAISS vector index
vectorindex_openai = FAISS.from_documents(docs, embeddings)

In [89]:
vectorindex_openai.save_local("faiss_index")


In [94]:
vectorindex_openai = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)


In [95]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai import ChatOpenAI

# Load retriever
retriever = vectorindex_openai.as_retriever()

# Define prompt
prompt = ChatPromptTemplate.from_template(
    "Answer the question based on the context:\n\n{context}\n\nQuestion: {question}"
)

# Create document chain
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, openai_api_key=openai_key)
document_chain = prompt | llm | StrOutputParser()

# Wrap retriever
retriever_chain = RunnableParallel({
    "context": retriever,
    "question": RunnablePassthrough()
})

# Final chain
retrieval_chain = retriever_chain | document_chain


In [96]:
query = "What is the article about?"
response = retrieval_chain.invoke(query)
print(response)


The article is about the rise of Wall Street as Tesla soars on AI optimism.


In [98]:
retrieval_chain.invoke("What is Tesla's Dojo supercomputer?")

"Tesla's Dojo supercomputer is a technology that could boost the company's market value by nearly $600 billion, according to Morgan Stanley."

### (4) Retrieve similar embeddings for a given question and call LLM to retrieve final answer

In [99]:
retrieval_chain.invoke("Why did Morgan Stanley upgrade Tesla?")

"Morgan Stanley upgraded Tesla because they believed that its Dojo supercomputer could boost the company's market value by nearly $600 billion."

In [100]:
retrieval_chain.invoke("what is the price of Tiago iCNG?")

'The price of Tiago iCNG ranges between Rs 6.55 lakh and Rs 8.1 lakh.'