In [7]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [13]:
# load openAI api key
from secret_key import openapi_key
os.environ['OPENAI_API_KEY'] = openapi_key

In [26]:
# Initialise LLM with required params
llm = OpenAI(temperature=0.9, max_tokens=500) 

  llm = OpenAI(temperature=0.9, max_tokens=500)


Load Data

In [27]:
loaders = UnstructuredURLLoader(urls = [
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])

data = loaders.load()
len(data)

2

Split data to create chunks

In [28]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)
len(docs)

4

In [29]:
docs[0]

Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html'}, page_content="USER CONSENT\n\nWe at moneycontrol use cookies and other tracking technologies to assist you with navigation and determine your location. We also capture cookies to obtain your feedback, analyse your use of our products and services and provide content from third parties. By clicking on 'I Accept', you agree to the usage of cookies and other tracking technologies. For more details you can refer to our cookie policy.\n\nWe collect cookies for the functioning of our website and to give you the best experience. This includes some essential cookies.\n\nCookies from third parties which may be used for personalization and determining your location. By clicking 'I Accept', you agree to the usage of cookies to enhance your personalized experience on our site. For more details you can refer to our cookie policy\n\nI agree to the updated 

Create embeddings for these chunks and save them to FAISS Index

In [30]:
# create the embeddings of the chunks using openAIEmbeddings
embeddings = OpenAIEmbeddings()

# Pass the documents and embeddings inorder to create FAISS vector index
vectorindex_openai = FAISS.from_documents(docs, embeddings)

In [31]:
# storing vector index create in local
vectorindex_openai.save_local("faiss_index")    

In [32]:
# load the vector space
vectorindex_openai = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)

Retrieve similar embeddings for a given question and call LLM to retrieve final answer.

In [33]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorindex_openai.as_retriever())
chain



In [34]:
query = "What is the price of Tiago iCNG?"
# query = "what are the main features of punch iCNG?"

langchain.debug=True

chain({"question": query}, return_only_outputs=True)


  chain({"question": query}, return_only_outputs=True)


[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "What is the price of Tiago iCNG?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "I agree to the updated privacy policy and I warrant that I am above 16 years of age\n\nI agree to the processing of my personal data for the purpose of personalised recommendations on financial and similar products offered by MoneyControl\n\nI agree personalized advertisements and any kind of remarketing/retargeting on other third party websites\n\nI agree to receive direct marketing communications via Emails and SMS\n\nPlease select (*) all mandatory conditions to continue.\n\nPage Generat

{'answer': ' The price of Tiago iCNG is Rs. 7.1 lakh.\n',
 'sources': 'https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html'}