In [1]:
import os
import streamlit as st
import pickle
import time
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.vectorstores import FAISS

In [None]:
llm = OpenAI(temperature=0.9, max_tokens=500)
loaders = UnstructuredURLLoader(urls=[
    "https://auto.hindustantimes.com/auto/electric-vehicles/tata-punch-ev-launched-at-starting-price-of-rs-10-99-lakh-gets-range-of-up-to-421-kms-41705468332021.html#:~:text=Tata%20Motors%20has%20launched%20the,for%20the%20top%2Dend%20variant.",
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html"
])
data =loaders.load()
len(data)

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
docs = text_splitter.split_documents(data)
len(docs)

In [None]:
embeddings = OpenAIEmbeddings()
vectorindex_openai = FAISS.from_documents(docs, embeddings)

In [None]:
vectorindex_openai.save_local("vectorstore")
x = FAISS.load_local("vectorstore", OpenAIEmbeddings(), allow_dangerous_deserialization=True)
retriever = x.as_retriever()

In [None]:
chain = RetrievalQAWithSourcesChain.from_llm(
    llm=llm,
    retriever=retriever
)
chain

In [None]:
from langchain.globals import set_debug
set_debug(True)
# langchain.debug = True ---> Does not work anymore, use above instead.

In [None]:
query = "What is the price of Tata Punch EV?"

chain({"question":query}, return_only_outputs=True)

In [None]:
# Storing vector index created in local
"""A PKL file is a file that contains serialized Python objects. 
Pickle is a Python module that allows you to convert Python objects into a byte stream, 
and then convert that byte stream back into Python objects. 
This is useful for storing Python objects in a file, or for sending them over a network."""

file_path = r"docs/vector_index.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vectorindex_openai, f)

In [None]:
file_path = r"docs/vector_index.pkl"
if os.path.exists(file_path):
    with open(file_path, "rb") as f:
        vectorIndex = pickle.load(f)