In [12]:
import os
from dotenv import load_dotenv

_ = load_dotenv()
API_KEY = os.getenv("API_KEY")
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
import tqdm as notebook_tqdm
from langchain.vectorstores import FAISS
from langchain_groq import ChatGroq
import pickle
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
import langchain
# from langchain.chains.qa_with_sources import load_qa_with_sources_chain

- LLM

In [2]:
llm = ChatGroq(
    model="meta-llama/llama-4-maverick-17b-128e-instruct",
    temperature=0.9,
    max_tokens=500,
    groq_api_key=API_KEY
)

- Web scrapping

In [3]:
loader = UnstructuredURLLoader(urls=[
    'https://www.moneycontrol.com/world/why-tesla-is-on-trial-over-a-fatal-autopilot-crash-and-what-it-means-for-the-future-of-driver-assist-technology-article-13298390.html',
    'https://economictimes.indiatimes.com/news/company/corporate-trends/tata-is-now-riding-the-new-wave-what-lies-ahead/articleshow/122878653.cms?from=mdr'
])
data =loader.load()
len(data)

2

In [4]:
# print(data[0].page_content)

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    # separators=['\n\n', '\n' , ' ', '.'],
    chunk_size = 1000,
    chunk_overlap = 200
)
docs = text_splitter.split_documents(data)
len(docs)

51

* embedding

In [6]:
# embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
# vectorindex = FAISS.from_documents(documents=docs, embedding=embeddings)


* store the vector index in pickel format

In [7]:
# with open("vector_index.pkl", "wb") as f:
#     pickle.dump(vectorindex, f)


* load the vectorindex.pkl

In [8]:
if os.path.exists('vector_index.pkl'):
    with open('vector_index.pkl' ,'rb') as f:
        vectorindex = pickle.load(f)

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
# qa_chain = load_qa_with_sources_chain(llm=llm, chain_type="stuff")
# chain = RetrievalQAWithSourcesChain(
#     combine_documents_chain=qa_chain,
#     retriever=vectorindex.as_retriever()
# )


In [10]:
chain = RetrievalQAWithSourcesChain.from_llm(
    llm=llm,
    retriever=vectorindex.as_retriever()
)
print(chain)



In [14]:
query = 'where The crash occurred when a Tesla driven by George McGee, slammed into the couple as they crossed a street.'
langchain.debug = True

chain({'question':query} ,return_only_outputs=True)

  chain({'question':query} ,return_only_outputs=True)
  return forward_call(*args, **kwargs)


[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "where The crash occurred when a Tesla driven by George McGee, slammed into the couple as they crossed a street."
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "A crash, a loss, and a courtroom confrontation\n\nStory continues below Advertisement\n\nRemove Ad\n\nThe crash occurred in Key Largo when a Tesla, driven by George McGee, slammed into the couple as they crossed a street. McGee admitted he was looking down to retrieve his phone with Autopilot engaged and missed a stop sign. Benavides Leon was found dead in nearby bushes, while Angulo was severely injured. The vic

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Token indices sequence length is longer than the specified maximum sequence length for this model (2232 > 1024). Running this sequence through the model will result in indexing errors


[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "where The crash occurred when a Tesla driven by George McGee, slammed into the couple as they crossed a street.",
  "summaries": "Content: The crash occurred in Key Largo when a Tesla, driven by George McGee, slammed into the couple as they crossed a street. \n\nThis is the relevant text, verbatim. It matches the question almost exactly.\nSource: https://www.moneycontrol.com/world/why-tesla-is-on-trial-over-a-fatal-autopilot-crash-and-what-it-means-for-the-future-of-driver-assist-technology-article-13298390.html\n\nContent: Here's the relevant text verbatim:\n \n\"In a rare and emotional federal courtroom trial unfolding in Miami, Tesla is facing intense scrutiny over the role its Autopilot system played in a 2019 crash that killed 22-year-old Naibel Benavides Leon and left her boyfriend, Dillon Angulo, with life-alt

{'answer': 'FINAL ANSWER: The crash occurred in Key Largo.\n',
 'sources': 'https://www.moneycontrol.com/world/why-tesla-is-on-trial-over-a-fatal-autopilot-crash-and-what-it-means-for-the-future-of-driver-assist-technology-article-13298390.html'}