In [None]:
pip install python-dotenv

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

In [None]:
import time
import langchain
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [None]:
# Initialise LLM with required params
llm = OpenAI(temperature=0.9, max_tokens=500)

# Load data

In [None]:
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])
data = loaders.load()
print(f"Loaded {len(data)} documents")

# Split data to create chunks

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)
print(f"Created {len(docs)} document chunks")

# Create embeddings for these chunks and save them to FAISS index

In [None]:
# Create embeddings (uncomment to create new index)
# embeddings = OpenAIEmbeddings()
# vectorindex_openai = FAISS.from_documents(docs, embeddings)

In [None]:
# Save vector index to local file (uncomment to save new index)
# vectorindex_openai.save_local("faiss_index")

In [None]:
# Load existing vector index
embeddings = OpenAIEmbeddings()
vectorIndex = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
print("Vector index loaded successfully")

In [None]:
# Create the retrieval chain
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorIndex.as_retriever())
print("Retrieval chain created successfully")

In [None]:
# Example queries
queries = [
    "what is the price of Tiago iCNG?",
    "what are the main features of punch iCNG?",
    "Which company builds safe cars Tesla or Tata Motors?",
    "what percentage will the central bank hold its interest rates at current levels at its September meeting?"
]

# Select a query to run
query = queries[0]  # Change index to select different query
print(f"Query: {query}")

# Run the query
result = chain({"question": query}, return_only_outputs=True)
print(f"\nAnswer: {result['answer']}")
print(f"Sources: {result['sources']}")