In [1]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain.llms import GooglePalm
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import GooglePalmEmbeddings
from langchain.vectorstores import FAISS

In [2]:
#load openAI api key
os.environ['GOOGLE_API_KEY'] = 'AIzaSyDEbmf-A6xSsYW7FhFuvyfXo1ct8GPEBdw'

In [3]:
# Initialise LLM with required params
llm = GooglePalm(temperature=0.9, max_tokens=500) 

### (1) Load data

In [5]:
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/technology/rainmatter-backed-smallcase-looks-to-widen-its-investment-products-by-getting-into-mutual-funds-and-fixed-income-products-12021201.html"
])
data = loaders.load() 
len(data)

2

### (2) Split data to create chunks

In [6]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)

In [7]:
len(docs)

44

In [8]:
docs[0]

Document(page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nMoneycontrol Trending Stock\n\nInfosys\xa0INE009A01021, INFY, 500209\n\nState Bank of India\xa0INE062A01020, SBIN, 500112\n\nYes Bank\xa0INE528G01027, YESBANK, 532648\n\nBank Nifty\n\nNifty 500\n\nQuotes\n\nMutual Funds\n\nCommodities\n\nFutures & Options\n\nCurrency\n\nNews\n\nCryptocurrency\n\nForum\n\nNotices\n\nVideos\n\nGlossary\n\nAll\n\nHello, LoginHello, LoginLog-inor Sign-UpMy AccountMy Profile My PortfolioMy WatchlistMy Credit Score₹100 CashbackMy FeedMy MessagesMy AlertsMy Profile My PROMy PortfolioMy WatchlistMy Credit Score₹100 CashbackMy FeedMy MessagesMy AlertsLogoutChat with UsDownload AppFollow us on:\n\nPremium\n\nMy Feed', metadata={'source': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html'})

### (3) Create embeddings for these chunks and save them to FAISS index

In [9]:
# Create the embeddings of the chunks using openAIEmbeddings
embeddings = GooglePalmEmbeddings()

# Pass the documents and embeddings inorder to create FAISS vector index
vectorindex_openai = FAISS.from_documents(docs, embeddings)

In [20]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

# Initialize instructor embeddings using the Hugging Face model
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
# Create a FAISS instance for vector database from 'docs'
vectordb = FAISS.from_documents(documents=docs,
                                 embedding=embeddings)

# vectorstore = Chroma.from_documents(documents=docs,embedding=embeddings)


In [21]:
# Storing vector index create in local
file_path="vector_index_palm.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vectordb, f)

In [22]:
if os.path.exists(file_path):
    with open(file_path, "rb") as f:
        vectorIndex = pickle.load(f)

### (4) Retrieve similar embeddings for a given question and call LLM to retrieve final answer

In [23]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorIndex.as_retriever())
chain



In [29]:
query = "what is the price of Tiago iCNG?"
# query = "what are the main features of punch iCNG?"
# query = "Tesla"

langchain.debug=True

# chain({"question": query}, return_only_outputs=True)
result = chain({"question": query}, return_only_outputs=True)
if not result:
    print("No result found")
else:
    print(result)


[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "what is the price of Tiago iCNG?"
}


[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "Qualcomm (QCOM.O) advanced 3.9% after the chipmaker signed a new deal with Apple (AAPL.O) to supply 5G chips to the iPhone maker until at least 2026.\n\nHostess Brands (TWNK.O) surged 19.1% after J. M. Smucker (SJM.N) said it would buy the Twinkies-maker in a $5.6 billion deal.\n\nAdvancing issues outnumbered falling ones within the S&P 500 (.AD.SPX) by a 1.5-to-one ratio.\n\nThe S&P 500 posted 14 new highs and 11 new lows; the Nasdaq recorded 36 new highs and 199 new lows.\n\nReuters\n\nTags:\n\n#International Markets\n\nfirst published: Sep 12, 2023 06:24 am\n\nCheck Free Credit Score on Moneycontrol: Easily track your loans, get insi

IndexError: list index out of range