In [10]:
# Cell 1: Environment Setup and Imports
import os
from dotenv import load_dotenv

# Load .env variables
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
print("HF_TOKEN loaded:", bool(HF_TOKEN))

HF_TOKEN loaded: True


In [11]:
# Cell 2: Wikipedia Tool Setup
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import WikipediaQueryRun

# Initialize Wikipedia API wrapper
wiki_api = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=150)
wiki_tool = WikipediaQueryRun(api_wrapper=wiki_api)
print("Wikipedia tool ready:", wiki_tool.name)

# Test Wikipedia tool
try:
    test_result = wiki_tool.run("Python programming language")
    print("Wikipedia test successful:", test_result[:100] + "...")
except Exception as e:
    print("Wikipedia tool error:", e)


Wikipedia tool ready: wikipedia
Wikipedia test successful: Page: Python (programming language)
Summary: Python is a high-level, general-purpose programming lan...


In [12]:
# Cell 3: Web Retriever Setup
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter

try:
    # Load web page
    print("Loading web page...")
    loader = WebBaseLoader("https://docs.smith.langchain.com/")
    docs = loader.load()
    print(f"Loaded {len(docs)} documents")
    
    # Very small chunks to avoid token limits
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=200,  # Very small chunks
        chunk_overlap=20
    )
    chunks = text_splitter.split_documents(docs)
    print(f"Created {len(chunks)} chunks")
    
    # Initialize Hugging Face embeddings
    print("Initializing embeddings...")
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    
    # Create FAISS vector store with minimal chunks
    print("Creating vector store...")
    vectordb = FAISS.from_documents(chunks[:5], embeddings)  # Only 5 chunks
    retriever = vectordb.as_retriever()
    
    print("Web retriever setup complete")
    retriever_available = True
    
except Exception as e:
    print(f"Warning: Could not set up retriever: {e}")
    retriever_available = False


Loading web page...
Loaded 1 documents
Created 10 chunks
Initializing embeddings...
Creating vector store...
Web retriever setup complete


In [35]:
# Cell 4: Create Retriever Tool (Fixed)
from langchain.tools import Tool

if retriever_available:
    try:
        def retriever_search(query: str) -> str:
            """Run retriever and return only plain text"""
            docs = retriever.get_relevant_documents(query)
            return "\n\n".join([doc.page_content for doc in docs])

        retriever_tool = Tool(
            name="langsmith_search",
            func=retriever_search,
            description="Search for information about LangSmith. Returns text from docs."
        )

        print("Retriever tool ready:", retriever_tool.name)
        
        # Test retriever tool
        test_result = retriever_tool.run("What is LangSmith?")
        print("Retriever test successful:", test_result[:200] + "...")
        
    except Exception as e:
        print(f"Warning: Could not create retriever tool: {e}")
        retriever_tool = None
else:
    retriever_tool = None
    print("Retriever tool not available")

Retriever tool ready: langsmith_search


  docs = retriever.get_relevant_documents(query)


Retriever test successful: started with LangSmithCopy pageCopy pageLangSmith is a platform for building production-grade LLM applications. Monitor and evaluate your application, so you can ship quickly and with confidence.

Get...


In [36]:
# Cell 5: Arxiv Tool Setup
from langchain_community.utilities import ArxivAPIWrapper
from langchain_community.tools import ArxivQueryRun

try:
    arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=150)
    arxiv_tool = ArxivQueryRun(api_wrapper=arxiv_wrapper)
    print("Arxiv tool ready:", arxiv_tool.name)
    
    # Test Arxiv tool
    test_result = arxiv_tool.run("machine learning")
    print("Arxiv test successful:", test_result[:100] + "...")
    
except Exception as e:
    print(f"Warning: Could not set up arxiv tool: {e}")
    arxiv_tool = None


Arxiv tool ready: arxiv
Arxiv test successful: Published: 2019-09-08
Title: Lecture Notes: Optimization for Machine Learning
Authors: Elad Hazan
Su...


In [37]:
# ✅ Cell 6: LLM Setup (Fixed Version)
from langchain_community.llms import HuggingFacePipeline
from transformers import pipeline

try:
    print("Initializing language model...")
    
    # ✅ Removed return_full_text (not valid here)
    hf_pipe = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
    device=-1,  # CPU only
    max_length=512   # ✅ avoid sequence too long errors
    )
    
    # ✅ Keep only valid model_kwargs
    llm = HuggingFacePipeline(
        pipeline=hf_pipe,
        model_kwargs={
            "max_length": 200,
            "temperature": 0.7,
            "do_sample": True
        }
    )
    print("LLM initialized successfully")
    
    # ✅ Test LLM
    test_response = llm.invoke("What is artificial intelligence?")
    print("LLM test successful:", test_response[:200] + "...")
    
except Exception as e:
    print(f"Error initializing LLM: {e}")
    # Fallback to a dummy LLM
    from langchain_community.llms.fake import FakeListLLM
    llm = FakeListLLM(responses=[
        "I'm a simplified language model. I can help with basic queries.",
        "Please try rephrasing your question.",
        "I'm processing your request..."
    ])
    print("Using fallback LLM")

Initializing language model...


Device set to use cpu


LLM initialized successfully
LLM test successful: Artificial Intelligence (AI) is the development of artificial intelligence (AI) technology....


In [38]:
# Cell 7: Agent Setup
from langchain import hub
from langchain.agents import initialize_agent, AgentType, AgentExecutor

# Collect available tools
available_tools = []
if 'wiki_tool' in locals() and wiki_tool:
    available_tools.append(wiki_tool)
if 'arxiv_tool' in locals() and arxiv_tool:
    available_tools.append(arxiv_tool)
if 'retriever_tool' in locals() and retriever_tool:
    available_tools.append(retriever_tool)

print(f"Available tools: {[tool.name for tool in available_tools]}")

if available_tools:
    try:
        print("Creating agent...")
        
        # Use standard ReAct prompt
        prompt = hub.pull("hwchase17/react")
        
        agent = create_react_agent(
            llm=llm,
            tools=available_tools,
            prompt=prompt
        )
        
        agent_executor = initialize_agent(
        tools=available_tools,
        llm=llm,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,  # ✅ simpler agent type
        verbose=True,
        handle_parsing_errors=True,
        max_iterations=2,
        max_execution_time=30,
        )
        
        print("Agent created successfully")
        agent_available = True
        
    except Exception as e:
        print(f"Could not create agent: {e}")
        agent_available = False
else:
    print("No tools available for agent")
    agent_available = False


Available tools: ['wikipedia', 'arxiv', 'langsmith_search']
Creating agent...
Agent created successfully


In [39]:
# Cell 8: Query Functions
def simple_query(question):
    """Simple query function with extensive error handling"""
    print(f"\n{'='*60}")
    print(f"Query: {question}")
    print('='*60)
    
    if not agent_available:
        return "Agent not available. Please check the setup."
    
    try:
        response = agent_executor.invoke({"input": question})
        return response.get('output', 'No response generated')
    
    except Exception as e:
        error_msg = str(e)
        print(f"Agent query failed: {error_msg[:200]}")
        
        # Try direct tool usage as fallback
        print("Trying direct tool fallback...")
        
        if "langsmith" in question.lower() and retriever_tool:
            try:
                result = retriever_tool.run(question)
                return f"(Via retriever tool) {result}"
            except Exception as tool_error:
                print(f"Retriever tool failed: {tool_error}")
                
        if wiki_tool and any(word in question.lower() for word in ['who is', 'what is', 'tell me about']):
            try:
                result = wiki_tool.run(question)
                return f"(Via Wikipedia) {result}"
            except Exception as tool_error:
                print(f"Wikipedia tool failed: {tool_error}")
        
        return f"Query failed: {error_msg[:100]}..."

def test_individual_tools():
    """Test each tool individually"""
    print("\n" + "="*60)
    print("TESTING INDIVIDUAL TOOLS")
    print("="*60)
    
    # Test Wikipedia
    if wiki_tool:
        try:
            result = wiki_tool.run("Machine Learning")
            print("✅ Wikipedia tool working:", result[:100] + "...")
        except Exception as e:
            print("❌ Wikipedia tool failed:", e)
    
    # Test Arxiv
    if arxiv_tool:
        try:
            result = arxiv_tool.run("transformer model")
            print("✅ Arxiv tool working:", result[:100] + "...")
        except Exception as e:
            print("❌ Arxiv tool failed:", e)
    
    # Test Retriever
    if retriever_tool:
        try:
            result = retriever_tool.run("LangSmith features")
            print("✅ Retriever tool working:", result[:100] + "...")
        except Exception as e:
            print("❌ Retriever tool failed:", e)


In [40]:
# Cell 9: Test Queries
print("="*60)
print("RAG AGENT SETUP COMPLETE")
print("="*60)

# Test individual tools first
test_individual_tools()

# Test agent queries
if agent_available:
    print("\n" + "="*60)
    print("TESTING AGENT QUERIES")
    print("="*60)
    
    test_queries = [
        "What is machine learning?",
        "Tell me about LangSmith",
        "Who is Albert Einstein?"
    ]
    
    for query in test_queries:
        try:
            result = simple_query(query)
            print(f"Result: {result}\n")
        except Exception as e:
            print(f"Query failed: {e}\n")
else:
    print("\n❌ Agent not available. Only individual tool testing completed.")

print("Setup and testing complete!")


RAG AGENT SETUP COMPLETE

TESTING INDIVIDUAL TOOLS
✅ Wikipedia tool working: Page: Machine learning
Summary: Machine learning (ML) is a field of study in artificial intelligence...
✅ Arxiv tool working: Published: 2012-10-23
Title: Model Validation in Ontology Based Transformations
Authors: Jesús M. Al...
✅ Retriever tool working: Get started with LangSmith - Docs by LangChainOur new LangChain Academy course on Deep Agents is now...

TESTING AGENT QUERIES

Query: What is machine learning?


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mMachine learning is a type of computer science.[0m
Observation: Invalid Format: Missing 'Action:' after 'Thought:
Thought:[32;1m[1;3mMachine learning is a type of computer science.[0m
Observation: Invalid Format: Missing 'Action:' after 'Thought:
Thought:[32;1m[1;3m[0m

[1m> Finished chain.[0m
Result: Agent stopped due to iteration limit or time limit.


Query: Tell me about LangSmith


[1m> Entering new AgentExecutor chain...[0m


In [41]:
# Cell 10: Interactive Query Cell (Optional)
# Run this cell to ask custom questions
def ask_question(question):
    """Interactive query function"""
    if agent_available:
        return simple_query(question)
    else:
        print("Agent not available. Trying individual tools...")
        
        # Try tools directly
        if wiki_tool and any(word in question.lower() for word in ['who', 'what', 'tell me']):
            try:
                return f"(Wikipedia) {wiki_tool.run(question)}"
            except:
                pass
        
        if arxiv_tool and any(word in question.lower() for word in ['paper', 'research', 'study']):
            try:
                return f"(Arxiv) {arxiv_tool.run(question)}"
            except:
                pass
        
        if retriever_tool and 'langsmith' in question.lower():
            try:
                return f"(Retriever) {retriever_tool.run(question)}"
            except:
                pass
        
        return "No suitable tool found for this question."


In [42]:
result = ask_question("Tell me about Langsmith")
print(result)


Query: Tell me about Langsmith


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mLangsmith is a fictional character in the science fiction television series LangSmith. Langsmith is a fictional character in the science fiction television series LangSmith.[0m
Observation: Invalid Format: Missing 'Action:' after 'Thought:
Thought:[32;1m[1;3mLangsmith is a fictional character in the science fiction television series LangSmith. Langsmith is a fictional character in the science fiction television series LangSmith.[0m
Observation: Invalid Format: Missing 'Action:' after 'Thought:
Thought:[32;1m[1;3m[0m

[1m> Finished chain.[0m
Agent stopped due to iteration limit or time limit.


In [43]:
result = ask_question("What's the paper 1605.08386 about?")
print(result)


Query: What's the paper 1605.08386 about?


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe paper 1605.08386 is about a scientist.[0m
Observation: Invalid Format: Missing 'Action:' after 'Thought:
Thought:[32;1m[1;3mThe paper 1605.08386 is about a scientist.[0m
Observation: Invalid Format: Missing 'Action:' after 'Thought:
Thought:[32;1m[1;3m[0m

[1m> Finished chain.[0m
Agent stopped due to iteration limit or time limit.
