In [None]:
# Install core LangChain ecosystem
!pip install -q -U langchain langchain-core langchain-community langchain-classic
!pip install -q -U langchain-text-splitters langchain-groq langchain-huggingface

# Install document loaders and parsing dependencies
!pip install -q -U unstructured python-magic beautifulsoup4 lxml requests

# Install vector database
!pip install -q -U faiss-cpu

# Install embedding model dependencies
!pip install -q -U sentence-transformers transformers torch

print("All dependencies installed successfully")

This is step 1 where we have to load the documents using the URL


In [2]:
loader=UnstructuredURLLoader(urls=[
    "https://economictimes.indiatimes.com/markets/stocks/news/silver-etfs-rally-up-to-17-in-dramatic-reversal-but-will-100-barrier-throw-bulls-out-of-the-game/articleshow/127238052.cms?from=mdr",
    "https://english.elpais.com/international/2026-02-25/trumps-plans-for-cubans-in-the-us-maybe-they-want-to-go-back-theyre-going-to-have-that-choice.html"
])

In [3]:
data=loader.load()

In [4]:
len(data)

2

In [5]:
data[0].metadata

{'source': 'https://economictimes.indiatimes.com/markets/stocks/news/silver-etfs-rally-up-to-17-in-dramatic-reversal-but-will-100-barrier-throw-bulls-out-of-the-game/articleshow/127238052.cms?from=mdr'}

This is step 2 where we have to split the chunks obtained from the document

In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

r_splitter=RecursiveCharacterTextSplitter(
    separators=["\n\n"],\
    chunk_size=200,
    chunk_overlap=0
)
chunks = r_splitter.split_documents(data)
len(chunks)

144

In [7]:
data[1]

Document(metadata={'source': 'https://english.elpais.com/international/2026-02-25/trumps-plans-for-cubans-in-the-us-maybe-they-want-to-go-back-theyre-going-to-have-that-choice.html'}, page_content='cuba\n\nTrump’s plans for Cubans in the US: ‘Maybe they want to go back. They’re going to have that choice’\n\nBesides the economic blockade, the first deportation flight back to the island in decades suggests that reverse migration is also factored into the Republican administration’s calculations\n\nCarla Gloria Colomé\n\nMiami - Feb 25, 2026 - 10:53CET\n\nShare on Whatsapp\n\nShare on Facebook\n\nShare on Twitter\n\nShare on Bluesky\n\nShare on Linkedin\n\nCopy link\n\nWithout elaborating on a plan, President Donald Trump has hinted in recent weeks at his administration’s position on Cuba: he began by repeatedly calling it a “failed nation” that “will fall very soon,” and has ended by speculating about a possible dialogue with the Cuban leadership. Amidst all the anticipation surrounding 

In [17]:
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    api_key=""
)


In [18]:
loader=UnstructuredURLLoader(urls=[
    "https://economictimes.indiatimes.com/markets/stocks/news/silver-etfs-rally-up-to-17-in-dramatic-reversal-but-will-100-barrier-throw-bulls-out-of-the-game/articleshow/127238052.cms?from=mdr",
    "https://english.elpais.com/international/2026-02-25/trumps-plans-for-cubans-in-the-us-maybe-they-want-to-go-back-theyre-going-to-have-that-choice.html"
])
data=loader.load()
len(data)

2

In [39]:
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=10
)

docs=text_splitter.split_documents(data)
for doc in data:
    doc.page_content = doc.page_content.replace("\nSave", "")
    doc.page_content = doc.page_content.replace("\nPrint", "")
    doc.page_content = doc.page_content.replace("\nComment", "")
len(docs)

162

In [40]:
import re

for doc in docs:
    doc.page_content = re.sub(r'\s+', ' ', doc.page_content).strip()

In [48]:
print(docs[10].page_content)

Silver Inches Towards Magical $100


In [49]:
len(docs[4].page_content)

262

In [53]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

In [55]:
# create embeddings object
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"
)

# create FAISS vectorstore
vectorstore = FAISS.from_documents(docs, embeddings)

Collecting faiss-cpu
  Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m28.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.2


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

MPNetModel LOAD REPORT from: sentence-transformers/all-mpnet-base-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


In [56]:
file_path="vector_index.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vectorstore, f)

In [60]:
if os.path.exists(file_path):
  with open(file_path,"rb") as f:
    vectorIndex=pickle.load(f)

In [63]:
chain = RetrievalQAWithSourcesChain.from_llm(
    llm=llm,
    retriever=vectorIndex.as_retriever()
)
chain



In [70]:
query="How many cubans did trump deport in the second term?"
langchain.debug=True

chain({"question":query},return_only_outputs=True)

{'answer': "I was able to find relevant information in the sources provided. \n\nFrom the source https://english.elpais.com/international/2026-02-25/trumps-plans-for-cubans-in-the-us-maybe-they-want-to-go-back-theyre-going-to-have-that-choice.html:\n\n- During his second term alone, 1,668 Cubans have been deported to the island, bringing the total number of Cubans deported during his two terms to this point.\n\nHowever, the text does not provide information about the total number of Cubans deported during Trump's two terms. It only mentions the number of deportations during his second term.\n\nFINAL ANSWER: 1,668 Cubans have been deported to the island during Trump's second term.\n",
 'sources': 'https://english.elpais.com/international/2026-02-25/trumps-plans-for-cubans-in-the-us-maybe-they-want-to-go-back-theyre-going-to-have-that-choice.html'}