In [5]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [6]:
#load openAI api key
os.environ['OPENAI_API_KEY'] = 'OPEN AI KEY'

In [8]:
# Initialise LLM with required params
llm = OpenAI(temperature=0.9, max_tokens=500) 

### (1) Load data

In [9]:
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])
data = loaders.load() 
len(data)

2

### (2) Split data to create chunks

In [10]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)

In [11]:
len(docs)

34

In [12]:
docs[0]

Document(page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nMoneycontrol Trending Stock\n\nInfosys\xa0INE009A01021, INFY, 500209\n\nState Bank of India\xa0INE062A01020, SBIN, 500112\n\nYes Bank\xa0INE528G01027, YESBANK, 532648\n\nBank Nifty\n\nNifty 500\n\nQuotes\n\nMutual Funds\n\nCommodities\n\nFutures & Options\n\nCurrency\n\nNews\n\nCryptocurrency\n\nForum\n\nNotices\n\nVideos\n\nGlossary\n\nAll\n\nHello, Login Hello, LoginLog-inor Sign-UpMy AccountMy Profile My PortfolioMy WatchlistFREE Credit Score₹100 Cash RewardMy AlertsMy MessagesPrice AlertsMy Profile My PROMy PortfolioMy WatchlistFREE Credit Score₹100 Cash RewardMy AlertsMy MessagesPrice AlertsLogoutChat with UsDownload AppFollow us on:\n\nGo Ad-Free\n\nMy Alerts', metadata={'source': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html'})

### (3) Create embeddings for these chunks and save them to FAISS index

In [14]:
# Create the embeddings of the chunks using openAIEmbeddings
embeddings = OpenAIEmbeddings()

# Pass the documents and embeddings inorder to create FAISS vector index
vectorindex_openai = FAISS.from_documents(docs, embeddings)

In [25]:
# Storing vector index create in local
file_path="vector_index.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vectorindex_openai, f)

TypeError: cannot pickle 'SSLContext' object

In [20]:
import jsonpickle

file_path = "vector_index.json"
with open(file_path, "w") as f:
    json_str = jsonpickle.encode(vectorindex_openai)
    f.write(json_str)

# To deserialize:
with open(file_path, "r") as f:
    json_str = f.read()
    vectorindex_openai = jsonpickle.decode(json_str)


In [21]:
if os.path.exists(file_path):
    with open(file_path, "rb") as f:
        vectorIndex = pickle.load(f)

UnpicklingError: invalid load key, '{'.

In [22]:
# Deserialize the object from JSON formatted string
file_path = "vector_index.json"
if os.path.exists(file_path):
    with open(file_path, "r") as f:
        json_str = f.read()
        vectorIndex = jsonpickle.decode(json_str)

### (4) Retrieve similar embeddings for a given question and call LLM to retrieve final answer

In [23]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorIndex.as_retriever())
chain



In [24]:
query = "what is the price of Tiago iCNG?"
# query = "what are the main features of punch iCNG?"

langchain.debug=True

chain({"question": query}, return_only_outputs=True)

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "what is the price of Tiago iCNG?"
}
[31;1m[1;3m[chain/error][0m [1m[chain:RetrievalQAWithSourcesChain] [10ms] Chain run errored with error:
[0m"AttributeError(\"'Embeddings' object has no attribute '_post'\")Traceback (most recent call last):\n\n\n  File \"/Users/rohansaxena/Desktop/AI Engineer/Week 31-32 - LLM & Langchain/langchain main 2_news_research_tool_project/myenv/lib/python3.10/site-packages/langchain/chains/base.py\", line 156, in invoke\n    self._call(inputs, run_manager=run_manager)\n\n\n  File \"/Users/rohansaxena/Desktop/AI Engineer/Week 31-32 - LLM & Langchain/langchain main 2_news_research_tool_project/myenv/lib/python3.10/site-packages/langchain/chains/qa_with_sources/base.py\", line 152, in _call\n    docs = self._get_docs(inputs, run_manager=_run_manager)\n\n\n  File \"/Users/rohansaxena/Desktop/AI Engineer/Week 31-32 - LLM & Langchain/lang

  warn_deprecated(


AttributeError: 'Embeddings' object has no attribute '_post'