In [5]:
! pip install langchain_community tiktoken langchain_google_genai langchain-openai langchainhub chromadb langchain

Collecting langchain_google_genai
  Using cached langchain_google_genai-2.1.8-py3-none-any.whl.metadata (7.0 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain_google_genai)
  Using cached filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting google-ai-generativelanguage<0.7.0,>=0.6.18 (from langchain_google_genai)
  Using cached google_ai_generativelanguage-0.6.18-py3-none-any.whl.metadata (9.8 kB)
Collecting google-api-core!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0,>=1.34.1 (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0,>=1.34.1->google-ai-generativelanguage<0.7.0,>=0.6.18->langchain_google_genai)
  Using cached google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB)
Collecting proto-plus<2.0.0,>=1.22.3 (from google-ai-generativelanguage<0.7.0,>=0.6.18->langchain_google_genai)
  Using cached proto_plus-1.26.1-py3-none-any.whl.metadata (2.2 k

In [6]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = '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'

In [12]:
import os
import requests
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings

# Set your Google API key
os.environ['GOOGLE_API_KEY'] = 'AIzaSyDsJJu5oN0BQrEKvnotU6uYEl5Mxw9fiug'

def load_web_content_manually(url):
    """Manually load and parse web content to ensure we get the main article"""
    print(f"Fetching content from {url}")
    response = requests.get(url)
    soup = bs4.BeautifulSoup(response.content, 'html.parser')
    
    # Try multiple selectors to find the main content
    content_selectors = [
        'article',
        '.post-content',
        '.content', 
        'main',
        '#main-content',
        '.entry-content'
    ]
    
    content = ""
    for selector in content_selectors:
        elements = soup.select(selector)
        if elements:
            content = elements[0].get_text(strip=True)
            print(f"Found content with selector '{selector}': {len(content)} characters")
            break
    
    # If no specific selector works, get all text from body
    if not content:
        body = soup.find('body')
        if body:
            content = body.get_text(strip=True)
            print(f"Using body content: {len(content)} characters")
    
    return content

#### INDEXING ####

print("Loading documents manually...")
try:
    # Method 1: Try WebBaseLoader with no restrictions first
    loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
    docs = loader.load()
    
    # If content seems too short or wrong, try manual method
    if len(docs[0].page_content) < 10000 or "task decomposition" not in docs[0].page_content.lower():
        print("WebBaseLoader didn't get good content, trying manual approach...")
        content = load_web_content_manually("https://lilianweng.github.io/posts/2023-06-23-agent/")
        docs = [Document(page_content=content, metadata={"source": "https://lilianweng.github.io/posts/2023-06-23-agent/"})]
    
except Exception as e:
    print(f"WebBaseLoader failed: {e}")
    print("Falling back to manual loading...")
    content = load_web_content_manually("https://lilianweng.github.io/posts/2023-06-23-agent/")
    docs = [Document(page_content=content, metadata={"source": "https://lilianweng.github.io/posts/2023-06-23-agent/"})]

print(f"Final content length: {len(docs[0].page_content)} characters")
print(f"Content preview:\n{docs[0].page_content[:1000]}...")

# Verify we have the right content
if "task decomposition" in docs[0].page_content.lower():
    print("✅ Content contains 'task decomposition'")
else:
    print("❌ Content does not contain 'task decomposition'")

# Split
print("Splitting documents...")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
print(f"Created {len(splits)} chunks")

# Check for task decomposition in chunks
task_decomp_chunks = []
for i, split in enumerate(splits):
    if "task decomposition" in split.page_content.lower():
        task_decomp_chunks.append((i, split))

print(f"Found {len(task_decomp_chunks)} chunks containing 'task decomposition'")
if task_decomp_chunks:
    print("Sample task decomposition chunk:")
    print(task_decomp_chunks[0][1].page_content[:500] + "...")

# Embed
print("Creating embeddings...")
vectorstore = Chroma.from_documents(
    documents=splits, 
    embedding=GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")  # Updated embedding model
)

retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

#### RETRIEVAL and GENERATION ####

# Custom prompt optimized for Gemini
custom_prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.

Context: {context}

Question: {question}

Answer:"""

prompt = PromptTemplate.from_template(custom_prompt)

# LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash-exp",
    temperature=0
)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Test the system
print("\n" + "="*50)
print("Testing retrieval...")
test_question = "What is Self-Reflection?"
retrieved_docs = retriever.invoke(test_question)
print(f"Retrieved {len(retrieved_docs)} documents")

print("Sample retrieved content:")
for i, doc in enumerate(retrieved_docs[:2]):  # Show first 2
    print(f"\nDoc {i+1}: {doc.page_content[:400]}...")

print("\n" + "="*50)
print("Running RAG chain...")
result = rag_chain.invoke(test_question)
print(f"Result: {result}")

# Test with another question
print("\n" + "="*50)
alt_result = rag_chain.invoke("What are the components of an agent system?")
print(f"Alternative question result: {alt_result}")

Loading documents manually...
Final content length: 43801 characters
Content preview:






LLM Powered Autonomous Agents | Lil'Log







































Lil'Log

















|






Posts




Archive




Search




Tags




FAQ









      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


 


Table of Contents



Agent System Overview

Component One: Planning

Task Decomposition

Self-Reflection


Component Two: Memory

Types of Memory

Maximum Inner Product Search (MIPS)


Component Three: Tool Use

Case Studies

Scientific Discovery Agent

Generative Agents Simulation

Proof-of-Concept Examples


Challenges

Citation

References





Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies,

In [13]:
# Documents
question = "What kinds of pets do I like?"
document = "My favorite pet is a cat."

In [14]:
import tiktoken

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

num_tokens_from_string(question, "cl100k_base")


8

In [15]:
# import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Set your Google API key
# os.environ['GOOGLE_API_KEY'] = '<your-google-api-key>'

# Original OpenAI code:
# from langchain_openai import OpenAIEmbeddings
# embd = OpenAIEmbeddings()

# Converted to Google Gemini:
embd = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")

# Usage remains exactly the same:
question = "What is Task Decomposition?"
document = "Task decomposition is a technique where complex tasks are broken down into smaller, manageable subtasks."

query_result = embd.embed_query(question)
document_result = embd.embed_query(document)

print(f"Query embedding length: {len(query_result)}")
print(f"Document embedding length: {len(document_result)}")
print(f"Query embedding sample (first 5 values): {query_result[:5]}")

# Additional methods available:
documents = [
    "Task decomposition breaks complex tasks into smaller parts.",
    "LLM agents use reasoning to solve problems.",
    "Autonomous agents can plan and execute tasks."
]

# Embed multiple documents at once
doc_embeddings = embd.embed_documents(documents)
print(f"Number of document embeddings: {len(doc_embeddings)}")
print(f"Each embedding has {len(doc_embeddings[0])} dimensions")

# Calculate similarity between query and documents (optional)
import numpy as np

def cosine_similarity(vec1, vec2):
    """Calculate cosine similarity between two vectors"""
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

print("\nSimilarity scores:")
for i, doc_emb in enumerate(doc_embeddings):
    similarity = cosine_similarity(query_result, doc_emb)
    print(f"Document {i+1}: {similarity:.4f} - '{documents[i][:50]}...'")

Query embedding length: 768
Document embedding length: 768
Query embedding sample (first 5 values): [0.00200876547023654, 0.01027664914727211, -0.010844051837921143, -0.02176510915160179, -0.00010687056055758148]
Number of document embeddings: 3
Each embedding has 768 dimensions

Similarity scores:
Document 1: 0.9360 - 'Task decomposition breaks complex tasks into small...'
Document 2: 0.6988 - 'LLM agents use reasoning to solve problems....'
Document 3: 0.7480 - 'Autonomous agents can plan and execute tasks....'
