In [2]:
# Setup: Import required libraries
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

# Import LangChain components
from langchain_community.retrievers import ArxivRetriever, WikipediaRetriever, TavilySearchAPIRetriever
from langchain_core.documents import Document
# from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Verify versions
import langchain
print(f"‚úÖ LangChain version: {langchain.__version__}")
print("‚úÖ Setup complete!")

‚úÖ LangChain version: 1.2.0
‚úÖ Setup complete!


In [3]:
# Create an ArxivRetriever instance
# By default, it returns top 3 documents
arxiv_retriever = ArxivRetriever(load_max_docs=3)

# Search for papers on "large language models"
query = "large language models"
docs = arxiv_retriever.invoke(query)

print(f"üìö Found {len(docs)} papers on '{query}'\n")

# Display first paper
print("=" * 80)
print(f"Title: {docs[0].metadata.get('Title', 'N/A')}")
print(f"Authors: {docs[0].metadata.get('Authors', 'N/A')}")
print(f"Published: {docs[0].metadata.get('Published', 'N/A')}")
print(f"\nAbstract (first 500 chars):\n{docs[0].page_content[:500]}...")
print("=" * 80)
print(f"Title: {docs[1].metadata.get('Title', 'N/A')}")
print("=" * 80)
print(f"Title: {docs[2].metadata.get('Title', 'N/A')}")


üìö Found 3 papers on 'large language models'

Title: Large Language Models Lack Understanding of Character Composition of Words
Authors: Andrew Shin, Kunitake Kaneko
Published: 2024-07-23

Abstract (first 500 chars):
Large language models (LLMs) have demonstrated remarkable performances on a wide range of natural language tasks. Yet, LLMs' successes have been largely restricted to tasks concerning words, sentences, or documents, and it remains questionable how much they understand the minimal units of text, namely characters. In this paper, we examine contemporary LLMs regarding their ability to understand character composition of words, and show that most of them fail to reliably carry out even the simple t...
Title: Is Self-knowledge and Action Consistent or Not: Investigating Large Language Model's Personality
Title: Unmasking the Shadows of AI: Investigating Deceptive Capabilities in Large Language Models


In [4]:
# Advanced: Retrieve more documents and explore metadata
arxiv_retriever_advanced = ArxivRetriever(
    load_max_docs=5,  # Get top 5 papers
    load_all_available_meta=True  # Load all metadata
)

# Search for papers on "transformers attention mechanism"
query = "transformers attention mechanism"
docs = arxiv_retriever_advanced.invoke(query)

print(f"üìö Retrieved {len(docs)} papers\n")

# Display metadata for all papers
for i, doc in enumerate(docs, 1):
    print(f"{i}. {doc.metadata.get('Title', 'N/A')}")
    print(f"   Authors: {doc.metadata.get('Authors', 'N/A')}")
    print(f"   Published: {doc.metadata.get('Published', 'N/A')}")
    print(f"   Entry ID: {doc.metadata.get('entry_id', 'N/A')}")
    print()

üìö Retrieved 3 papers

1. Vision Transformer with Quadrangle Attention
   Authors: Qiming Zhang, Jing Zhang, Yufei Xu, Dacheng Tao
   Published: 2023-03-27
   Entry ID: N/A

2. D√©j√† vu: A Contextualized Temporal Attention Mechanism for Sequential Recommendation
   Authors: Jibang Wu, Renqin Cai, Hongning Wang
   Published: 2020-01-29
   Entry ID: N/A

3. Self-Attention as Distributional Projection: A Unified Interpretation of Transformer Architecture
   Authors: Nihal Mehta
   Published: 2025-11-16
   Entry ID: N/A



In [5]:
# Batch processing: Search multiple topics at once
queries = [
    "RAG retrieval augmented generation",
    "vector embeddings",
    "prompt engineering"
]

arxiv_retriever_batch = ArxivRetriever(load_max_docs=3)
batch_results = arxiv_retriever_batch.batch(queries)

print("üìö Batch Search Results:\n")
for query, docs in zip(queries, batch_results):
    print(f"Query: '{query}'")
    print(f"  ‚Üí Found {len(docs)} papers")
    if docs:
        print(f"  ‚Üí Top result: {docs[0].metadata.get('Title', 'N/A')}")
    print()

üìö Batch Search Results:

Query: 'RAG retrieval augmented generation'
  ‚Üí Found 3 papers
  ‚Üí Top result: AR-RAG: Autoregressive Retrieval Augmentation for Image Generation

Query: 'vector embeddings'
  ‚Üí Found 3 papers
  ‚Üí Top result: Part-of-Speech Relevance Weights for Learning Word Embeddings

Query: 'prompt engineering'
  ‚Üí Found 3 papers
  ‚Üí Top result: Towards Goal-oriented Prompt Engineering for Large Language Models: A Survey



In [6]:
# Create a WikipediaRetriever instance
# By default, it returns top 3 documents
wiki_retriever = WikipediaRetriever(top_k_results=2)

# Search for information on "Python programming language"
query = "Python programming language"
docs = wiki_retriever.invoke(query)

print(f"üìñ Found {len(docs)} Wikipedia articles on '{query}'\n")

# Display first result
print("=" * 80)
print(f"Title: {docs[0].metadata.get('title', 'N/A')}")
print(f"Source: {docs[0].metadata.get('source', 'N/A')}")
print(f"\nContent (first 600 chars):\n{docs[0].page_content[:600]}...")
print("=" * 80)

print("=" * 80)
print(f"Title: {docs[1].metadata.get('title', 'N/A')}")
print(f"Source: {docs[1].metadata.get('source', 'N/A')}")
print(f"\nContent (first 600 chars):\n{docs[1].page_content[:600]}...")
print("=" * 80)

üìñ Found 2 Wikipedia articles on 'Python programming language'

Title: Python (programming language)
Source: https://en.wikipedia.org/wiki/Python_(programming_language)

Content (first 600 chars):
Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation. Python is dynamically type-checked and garbage-collected. It supports multiple programming paradigms, including structured (particularly procedural), object-oriented and functional programming.
Guido van Rossum began working on Python in the late 1980s as a successor to the ABC programming language. Python 3.0, released in 2008, was a major revision and not completely backward-compatible with earlier versions. Beginning with Python 3.5, capabi...
Title: Outline of the Python programming language
Source: https://en.wikipedia.org/wiki/Outline_of_the_Python_programming_language

Content (first 600 chars):
The following outline is provided as a

In [7]:
# Advanced: Control number of results and document length
wiki_retriever_advanced = WikipediaRetriever(
    top_k_results=3,        # Get top 3 results
    doc_content_chars_max=1000  # Limit content to 1000 characters per doc
)

# Search for "Machine Learning"
query = "Machine Learning"
docs = wiki_retriever_advanced.invoke(query)

print(f"üìñ Retrieved {len(docs)} Wikipedia articles\n")

# Display all results
for i, doc in enumerate(docs, 1):
    print(f"{i}. Title: {doc.metadata.get('title', 'N/A')}")
    print(f"   Summary: {doc.metadata.get('summary', 'N/A')[:150]}...")
    print(f"   Content length: {len(doc.page_content)} characters")
    print()

üìñ Retrieved 3 Wikipedia articles

1. Title: Machine learning
   Summary: Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can learn...
   Content length: 1000 characters

2. Title: Neural network (machine learning)
   Summary: In machine learning, a neural network or neural net (NN), also called artificial neural network (ANN), is a computational model inspired by the struct...
   Content length: 1000 characters

3. Title: Attention (machine learning)
   Summary: In machine learning, attention is a method that determines the importance of each component in a sequence relative to the other components in that seq...
   Content length: 1000 characters



In [8]:
# Search in different languages
# Default is English ('en'), but you can specify other languages

# Example: Search in Spanish
wiki_retriever_es = WikipediaRetriever(
    top_k_results=1,
    lang="es"  # Spanish Wikipedia
)

query = "Inteligencia Artificial"
docs = wiki_retriever_es.invoke(query)

print(f"üåê Search in Spanish Wikipedia: '{query}'\n")
print(f"Title: {docs[0].metadata.get('title', 'N/A')}")
print(f"Content preview:\n{docs[0].page_content[:400]}...")

üåê Search in Spanish Wikipedia: 'Inteligencia Artificial'

Title: Inteligencia artificial
Content preview:
La inteligencia artificial, abreviado como IA, en el contexto de las ciencias de la computaci√≥n, es una disciplina y un conjunto de capacidades cognoscitivas e intelectuales expresadas por sistemas inform√°ticos o combinaciones de algoritmos cuyo prop√≥sito es la creaci√≥n de m√°quinas que imiten la inteligencia humana.
Estas tecnolog√≠as permiten que las m√°quinas aprendan de la experiencia, se adapten...


In [9]:
# Batch search for multiple topics
queries = [
    "Albert Einstein",
    "Quantum Computing",
    "Neural Networks"
]

wiki_retriever_batch = WikipediaRetriever(top_k_results=1, doc_content_chars_max=500)
batch_results = wiki_retriever_batch.batch(queries)

print("üìñ Batch Wikipedia Search Results:\n")
for query, docs in zip(queries, batch_results):
    print(f"Query: '{query}'")
    if docs:
        print(f"  ‚Üí Title: {docs[0].metadata.get('title', 'N/A')}")
        print(f"  ‚Üí Summary: {docs[0].page_content[:200]}...")
    print()

üìñ Batch Wikipedia Search Results:

Query: 'Albert Einstein'
  ‚Üí Title: Albert Einstein
  ‚Üí Summary: Albert Einstein (14 March 1879 ‚Äì 18 April 1955) was a German-born theoretical physicist best known for developing the theory of relativity. Einstein also made important contributions to quantum theory...

Query: 'Quantum Computing'
  ‚Üí Title: Quantum computing
  ‚Üí Summary: A quantum computer is a (real or theoretical) computer that exploits superposed and entangled states. Quantum computers can be viewed as sampling from quantum systems that evolve in ways that may be d...

Query: 'Neural Networks'
  ‚Üí Title: Neural network (machine learning)
  ‚Üí Summary: In machine learning, a neural network or neural net (NN), also called artificial neural network (ANN), is a computational model inspired by the structure and functions of biological neural networks.
A...



In [10]:
import os
print(os.getenv("TAVILY_API_KEY"))


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


In [12]:
# Create a TavilySearchAPIRetriever instance
# Make sure TAVILY_API_KEY is set in your .env file

tavily_retriever = TavilySearchAPIRetriever(k=3)  # Return top 3 results

# Search for "latest developments in artificial intelligence 2024"
query = "latest developments in artificial intelligence December 2025"
docs = tavily_retriever.invoke(query)

print(f"üîç Found {len(docs)} web results for '{query}'\n")

# Display first result
print("=" * 80)
print(f"Source: {docs[0].metadata.get('source', 'N/A')}")
print(f"\nContent (first 500 chars):\n{docs[0].page_content[:500]}...")
print("=" * 80)

üîç Found 3 web results for 'latest developments in artificial intelligence December 2025'

Source: https://www.crescendo.ai/news/latest-ai-news-and-updates

Content (first 500 chars):
(AI Breakthrough) Google DeepMind Debuts GenCast for Advanced Weather Forecasting. Date: December 8, 2025. Summary: Google DeepMind has launched GenCast, a new ...Read more...


In [13]:
# Advanced: Control search depth and domain filtering
from langchain_community.retrievers import TavilySearchAPIRetriever

# Advanced configuration
tavily_retriever_advanced = TavilySearchAPIRetriever(
    k=5,  # Return top 5 results
    # search_depth="advanced",  # "basic" or "advanced" (more thorough)
    # include_domains=["github.com", "stackoverflow.com"],  # Filter to specific domains
    # exclude_domains=["example.com"]  # Exclude specific domains
)

# Search for "LangChain tutorials"
query = "LangChain tutorials"
docs = tavily_retriever_advanced.invoke(query)

print(f"üîç Retrieved {len(docs)} web results\n")

# Display all results with sources
for i, doc in enumerate(docs, 1):
    print(f"{i}. Source: {doc.metadata.get('source', 'N/A')}")
    print(f"   Content preview: {doc.page_content[:200]}...")
    print()

üîç Retrieved 5 web results

1. Source: https://github.com/gkamradt/langchain-tutorials
   Content preview: 1. LangChain CookBook Part 1: 7 Core Concepts - Code, Video. 2. LangChain CookBook Part 2: 9 Use Cases - Code, Video. üêí Intermediate = In depth use of LangChain. | ChatPDF | Ashish Talati | üêí Intermed...

2. Source: https://www.datacamp.com/tutorial/how-to-build-llm-applications-with-langchain
   Content preview: Explore the untapped potential of Large Language Models with LangChain, an open-source Python framework for building advanced AI applications. Here, we explore LangChain - An open-source Python framew...

3. Source: https://www.youtube.com/watch?v=nAmC7SoVLd8
   Content preview: LangChain Crash Course For Beginners | LangChain Tutorial
codebasics
1430000 subscribers
12848 likes
559346 views
30 Jun 2023
LangChain is an open-source framework that allows you to build application...

4. Source: https://www.youtube.com/@LangChain
   Content preview: ### The agent devel

In [14]:
# Example: Get current information (news, weather, stock prices, etc.)
from datetime import datetime

current_date = datetime.now().strftime("%B %d, %Y")

# Real-time queries
queries = [
    f"latest AI news {current_date}",
    "current weather in San Francisco",
    "NVIDIA stock price today"
]

tavily_realtime = TavilySearchAPIRetriever(k=2)

print(f"üïê Real-Time Information (as of {current_date}):\n")

for query in queries:
    docs = tavily_realtime.invoke(query)
    print(f"Query: '{query}'")
    if docs:
        print(f"  ‚Üí {docs[0].page_content[:250]}...")
        print(f"  ‚Üí Source: {docs[0].metadata.get('source', 'N/A')}")
    print()

üïê Real-Time Information (as of December 23, 2025):

Query: 'latest AI news December 23, 2025'
  ‚Üí Tech Pulse: December 23, 2025 - AI, Cybersecurity & Development News Roundup ; Lovable (Swedish AI startup): $330M Series B at $6.6B valuation,...
  ‚Üí Source: https://dev.to/krlz/tech-pulse-december-23-2025-ai-cybersecurity-development-news-roundup-1jeh

Query: 'current weather in San Francisco'
  ‚Üí {'location': {'name': 'San Francisco', 'region': 'California', 'country': 'United States of America', 'lat': 37.775, 'lon': -122.4183, 'tz_id': 'America/Los_Angeles', 'localtime_epoch': 1766476849, 'localtime': '2025-12-23 00:00'}, 'current': {'last_...
  ‚Üí Source: https://www.weatherapi.com/

Query: 'NVIDIA stock price today'
  ‚Üí As of today, NVIDIA(NVDA) shares are valued at $181.37. The company's market cap stands at 4.52T, with a P/E ratio of 44.83 and a dividend yield of 2.2%....
  ‚Üí Source: https://robinhood.com/us/en/stocks/NVDA/

