In [2]:
from langgraph.graph import START, END, StateGraph, MessagesState
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import ToolNode
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from dotenv import load_dotenv
from typing import Literal
import uuid
import os

print(" All imports successful")

# Load environment variables
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

if not openai_api_key:
    raise ValueError("OPENAI_API_KEY not found!")

# Initialize LLM
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.3, api_key=openai_api_key)
print(f" LLM initialized: {llm.model_name}")

 All imports successful
 LLM initialized: gpt-4o-mini


In [3]:
# Python tutorial documents
documents_data = [
    {
        "title": "Python List Comprehensions",
        "content": """List comprehensions provide a concise way to create lists. A list comprehension consists of brackets containing an expression followed by a for clause, then zero or more for or if clauses. The result will be a new list resulting from evaluating the expression in the context of the for and if clauses.

The basic syntax is [expression for item in iterable] where the expression is evaluated for each item. You can also add conditions with [expression for item in iterable if condition].

Example: squares = [x**2 for x in range(10)] creates [0, 1, 4, 9, 16, 25, 36, 49, 64, 81].

Nested list comprehensions are possible: matrix = [[i for i in range(3)] for _ in range(3)]. You can also use multiple for clauses: pairs = [(x, y) for x in range(3) for y in range(3)].

List comprehensions are more efficient than using loops and append() for most cases. They are readable and Pythonic. Performance testing shows list comprehensions are typically 20-40% faster than equivalent for loops.

You can also use list comprehensions to flatten nested structures: flattened = [item for sublist in nested_list for item in sublist]. This is equivalent to nested loops but in a single line.

Advanced usage includes ternary operators: [x if x % 2 == 0 else 'odd' for x in range(10)]. This creates conditional values in the list based on each element.

Other comprehension types include dictionary comprehensions: {x: x**2 for x in range(5)}, and set comprehensions: {x**2 for x in range(5)}. These follow similar patterns but produce different data structures."""
    },
    {
        "title": "Python Decorators Guide",
        "content": """Decorators are functions that modify the behavior of other functions or classes. They are a powerful feature of Python that allow you to "wrap" a function with another function to extend its behavior.

The most basic decorator is created by defining a wrapper function inside another function. The decorator uses the @decorator_name syntax. When you apply @decorator_name to a function, Python automatically passes the function to the decorator.

Example: 
```
def my_decorator(func):
    def wrapper():
        print("Something before the function")
        func()
        print("Something after the function")
    return wrapper

@my_decorator
def say_hello():
    print("Hello!")
```

When you call say_hello(), it will print "Something before", then "Hello!", then "Something after".

Decorators with arguments need an additional layer: a decorator that returns a decorator. This allows passing parameters to customize behavior.

Common use cases include:
- Logging: Track when functions are called
- Authentication: Check permissions before executing
- Caching: Store results to avoid recomputation
- Timing: Measure function execution time
- Validation: Check inputs before processing

Python's functools.wraps decorator is important for maintaining metadata of the original function. Without it, the wrapper loses the original function's __name__ and __doc__.

Class decorators work similarly to function decorators. The @dataclass decorator from the dataclasses module is a popular example that automatically generates special methods like __init__ and __repr__.

Chaining decorators is possible: @decorator1 @decorator2 @decorator3 def function(). The decorators are applied bottom-up, so decorator3 is applied first, then decorator2, then decorator1."""
    },
    {
        "title": "Understanding Python Generators",
        "content": """Generators are a way to create iterators using a simple function. A generator function yields values one at a time, pausing execution and resuming from where it left off.

The key keyword is 'yield'. When a function contains yield, calling it doesn't execute the code immediately but returns a generator object. Each time you call next() on the generator, it executes until the next yield.

Example:
```
def count_up(n):
    i = 0
    while i < n:
        yield i
        i += 1

for number in count_up(5):
    print(number)  # Prints 0, 1, 2, 3, 4
```

Generators are memory efficient because they generate values on-the-fly rather than storing all values in memory at once. This makes them ideal for processing large datasets.

Generator expressions are similar to list comprehensions but use parentheses: (x**2 for x in range(10)). This creates a generator instead of a list.

The generator protocol involves three methods:
- next(): Gets the next value from the generator
- send(): Send a value back to the generator
- throw(): Throw an exception into the generator
- close(): Close the generator

Generator functions can be combined with context managers using the contextlib.contextmanager decorator to create context managers from generators.

Common patterns include:
- Reading large files line by line without loading into memory
- Infinite sequences: def infinite_counter(start=0): n = start; while True: yield n; n += 1
- Pipeline processing: combining multiple generators
- Tree traversal: recursively yielding tree nodes

Benefits of generators:
- Reduced memory consumption for large datasets
- Lazy evaluation improves performance
- Cleaner, more readable code for complex iterations
- Better separation of concerns"""
    },
    {
        "title": "Python Context Managers (with statement)",
        "content": """Context managers provide a way to properly manage resources in Python. They are implemented using the 'with' statement and are used for file handling, database connections, locks, and more.

The basic syntax is: with open('file.txt') as f: content = f.read(). The file is automatically closed when exiting the with block, even if an exception occurs.

To create a context manager, implement the __enter__ and __exit__ methods in a class. __enter__ is called when entering the with block and __exit__ is called when exiting, ensuring cleanup happens.

Example:
```
class MyContextManager:
    def __enter__(self):
        print("Entering context")
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        print("Exiting context")
        return False  # Propagate exceptions
```

The contextlib module provides decorators to create context managers from generators. The @contextmanager decorator allows you to write context managers using yield:

```
from contextlib import contextmanager

@contextmanager
def my_context():
    print("Enter")
    yield
    print("Exit")
```

Common built-in context managers:
- open(): File handling
- threading.Lock(): Thread synchronization
- decimal.localcontext(): Decimal precision settings
- tempfile.TemporaryDirectory(): Temporary file management

Context managers handle three types of exceptions:
1. Exceptions in __enter__: Resource is not acquired
2. Exceptions in with block: __exit__ still runs
3. Exceptions in __exit__: Handled or propagated based on return value

The __exit__ method signature is: __exit__(exc_type, exc_value, traceback). Return True to suppress exceptions, False to propagate.

Best practices:
- Always use context managers for resource management
- Implement proper exception handling in __exit__
- Use contextlib for simple cases
- Document what resources are managed
- Ensure cleanup always happens"""
    },
    {
        "title": "Python Metaclasses Explained",
        "content": """Metaclasses are classes whose instances are classes themselves. They are the most advanced feature in Python and allow you to control how classes are created.

To understand metaclasses, remember that in Python, classes are objects too. Just as instances of a class are created from a class, classes themselves are created from a metaclass. The default metaclass is 'type'.

Every class in Python is an instance of a metaclass: class MyClass: pass is equivalent to class MyClass(metaclass=type): pass.

To create a custom metaclass, subclass type and override the __new__ and/or __init__ methods:

```
class MyMeta(type):
    def __new__(mcs, name, bases, attrs):
        print(f"Creating class {name}")
        return super().__new__(mcs, name, bases, attrs)
```

Then use it: class MyClass(metaclass=MyMeta): pass.

Common use cases for metaclasses:
1. Attribute validation: Ensure all methods are properly decorated
2. Singleton pattern: Create only one instance of a class
3. API frameworks: Automatically register subclasses
4. ORM systems: Database table mapping
5. Plugin systems: Automatic discovery and registration

Example - Singleton metaclass:
```
class SingletonMeta(type):
    _instances = {}
    
    def __call__(cls, *args, **kwargs):
        if cls not in cls._instances:
            cls._instances[cls] = super().__call__(*args, **kwargs)
        return cls._instances[cls]

class SingletonClass(metaclass=SingletonMeta):
    pass
```

The Method Resolution Order (MRO) is affected by metaclass hierarchies. Use the mro() method to inspect it.

Metaclasses are powerful but complex. Many problems typically solved with metaclasses can be solved more simply with decorators or descriptors.

Tim Peters quote: "Metaclasses are deeper magic than 99% of users should ever worry about."

Common patterns:
- Auto-registering subclasses
- Adding methods automatically
- Validating class definitions
- Creating domain-specific languages (DSLs)"""
    },
    {
        "title": "Python Async/Await and Asyncio",
        "content": """Async/await is Python's syntax for writing asynchronous code. It allows you to write non-blocking code that looks like synchronous code, making it easier to read and maintain.

Async functions are defined with the 'async def' keyword and can use 'await' to pause execution while waiting for I/O operations without blocking the entire program.

Basic example:
```
import asyncio

async def fetch_data():
    print("Fetching...")
    await asyncio.sleep(1)
    print("Done!")
    return "data"

asyncio.run(fetch_data())
```

The asyncio module provides event loop management. The event loop schedules and executes coroutines. asyncio.run() creates an event loop, runs the coroutine, and closes the loop.

Key concepts:
1. Coroutine: An async function that can be paused with await
2. Task: A coroutine wrapped in a way that schedules it for execution
3. Event Loop: Manages execution of coroutines and callbacks
4. Awaitable: Objects that support the await keyword

Creating concurrent tasks:
```
async def main():
    tasks = [fetch_data() for _ in range(3)]
    results = await asyncio.gather(*tasks)
    return results

asyncio.run(main())
```

asyncio.gather() runs multiple coroutines concurrently and waits for all to complete.

Common patterns:
- asyncio.create_task(): Schedule a coroutine as a task
- asyncio.wait(): Wait for tasks with timeout or for first completion
- async with: Async context managers for resource management
- async for: Async iterators

Important notes:
- Async code doesn't run in parallel (uses single thread)
- I/O operations are truly concurrent (network, disk, etc.)
- CPU-bound tasks need multiprocessing, not asyncio
- Never use blocking operations in async code

Common asyncio functions:
- asyncio.sleep(): Non-blocking sleep
- asyncio.Queue(): Thread-safe queues
- asyncio.Lock(): Synchronization primitives
- asyncio.Semaphore(): Limit concurrent access

Best practices:
- Use asyncio for I/O-bound operations
- Don't mix blocking and async code
- Use libraries that support async (aiohttp, motor, etc.)
- Test thoroughly for race conditions"""
    },
    {
        "title": "Python Type Hints and Static Type Checking",
        "content": """Type hints are optional annotations that specify the expected types of variables, function parameters, and return values. They improve code documentation and enable static type checking.

Basic syntax:
```
def greet(name: str) -> str:
    return f"Hello, {name}!"

x: int = 5
y: str = "hello"
```

Type hints don't enforce types at runtime but help with IDE autocompletion and type checkers like mypy.

Common type hints:
- int, str, float, bool: Built-in types
- List[int], Dict[str, int]: Generic types
- Optional[int]: Can be int or None
- Union[int, str]: Can be either type
- Any: Any type (avoid when possible)
- Callable[[int, str], bool]: Function types

From typing module import these commonly used types:
```
from typing import List, Dict, Optional, Union, Callable, Tuple

def process_items(items: List[str]) -> Dict[str, int]:
    return {item: len(item) for item in items}
```

Python 3.9+ allows using built-in types directly: list[str] instead of List[str].

Type variables allow generic code:
```
from typing import TypeVar

T = TypeVar('T')

def first_element(lst: list[T]) -> T:
    return lst[0]
```

Using mypy for static type checking:
```
$ mypy script.py
```

This catches type errors before runtime. Common mypy flags:
- --strict: Enable all optional checks
- --ignore-missing-imports: Ignore untyped libraries
- --no-implicit-optional: Require explicit Optional

Type aliases improve readability:
```
UserId = int
Coordinates = Tuple[float, float]

def get_user(user_id: UserId) -> dict:
    pass
```

Dataclasses work well with type hints:
```
from dataclasses import dataclass

@dataclass
class Person:
    name: str
    age: int
    email: str = ""
```

Protocols define duck-typing contracts:
```
from typing import Protocol

class Drawable(Protocol):
    def draw(self) -> None: ...
```

Benefits of type hints:
- Better IDE support and autocompletion
- Catch errors earlier in development
- Improve code documentation
- Enable better refactoring tools
- Support for static analysis

Modern Python emphasizes gradual typing - start with hints where most useful and gradually add more."""
    },
    {
        "title": "Python Virtual Environments and Package Management",
        "content": """Virtual environments are isolated Python environments where you can install packages without affecting the system Python or other projects.

Creating a virtual environment:
```
python -m venv myenv
```

Activating it:
```
# On Windows:
myenv\\Scripts\\activate

# On Unix/macOS:
source myenv/bin/activate
```

Once activated, pip installs packages into this isolated environment only.

pip (Package Installer for Python) is the standard tool for installing packages from PyPI (Python Package Index).

Common pip commands:
```
pip install package_name          # Install a package
pip install package==1.2.3        # Install specific version
pip install -r requirements.txt   # Install from file
pip list                          # List installed packages
pip show package_name             # Show package info
pip uninstall package_name        # Uninstall package
pip freeze > requirements.txt     # Export dependencies
```

requirements.txt format:
```
Django==3.2.0
requests>=2.25.0,<3.0.0
numpy~=1.20.0
```

Poetry is a modern alternative to pip offering better dependency management and packaging:
```
poetry init
poetry add package_name
poetry install
```

Poetry manages both dependencies and development dependencies (dev-dependencies) separately.

uv is an even faster replacement for pip written in Rust:
```
pip install uv
uv pip install package_name
```

Best practices:
- Always use virtual environments for projects
- Create requirements.txt for reproducibility
- Pin major versions: package>=1.0,<2.0
- Use semantic versioning
- Keep dependencies minimal
- Regularly update dependencies
- Document Python version requirements

virtual environments should not be committed to version control:
```
# .gitignore
venv/
.venv/
env/
```

Environment variables can be set in activate script for project-specific configuration.

Managing Python versions with pyenv:
```
pyenv install 3.11.0
pyenv local 3.11.0
```

This ensures your project uses the correct Python version.

Troubleshooting:
- "ModuleNotFoundError": Package not installed or wrong environment
- "Permission denied": May need --user flag or proper environment
- Version conflicts: Use requirements.txt to lock versions
- Slow installation: Use --no-cache-dir with pip"""
    }
]

# Convert to Document objects
documents = [
    Document(page_content=doc["content"], metadata={"title": doc["title"]})
    for doc in documents_data
]

print(f" Created {len(documents)} Python tutorial documents")
print(f"   Topics: {', '.join([doc['title'] for doc in documents_data])}")

 Created 8 Python tutorial documents
   Topics: Python List Comprehensions, Python Decorators Guide, Understanding Python Generators, Python Context Managers (with statement), Python Metaclasses Explained, Python Async/Await and Asyncio, Python Type Hints and Static Type Checking, Python Virtual Environments and Package Management


In [4]:
# Split documents into chunks
# Using chunk_size=1000 to balance specificity and context
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    separators=["\n\n", "\n", ". ", " ", ""]
)

# Split all documents
split_docs = []
for doc in documents:
    chunks = text_splitter.split_documents([doc])
    split_docs.extend(chunks)

print(f" Split {len(documents)} documents into {len(split_docs)} chunks")
print(f"   Average chunk size: {sum(len(doc.page_content) for doc in split_docs) // len(split_docs)} chars")

# Initialize embeddings using OpenAI
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    api_key=openai_api_key
)
print(" Embeddings initialized with OpenAI text-embedding-3-small")

# Create Chroma vector store
vectorstore = Chroma.from_documents(
    documents=split_docs,
    embedding=embeddings,
    collection_name="python_tutorials"
)

print(f" Chroma vector store created with {len(split_docs)} chunks")

 Split 8 documents into 22 chunks
   Average chunk size: 763 chars
 Embeddings initialized with OpenAI text-embedding-3-small
 Chroma vector store created with 22 chunks


In [6]:
# Track retrieval statistics
retrieval_log = []

@tool
def retrieve_python_docs(query: str) -> str:
    """
    Retrieve relevant Python documentation for a query using MMR (Maximal Marginal Relevance).
    Use this tool when users ask about Python programming concepts, syntax, or best practices.
    
    Args:
        query: The user's question or topic to search for
        
    Returns:
        Formatted context from the most relevant documentation
    """
    # Use MMR for diverse results
    retriever = vectorstore.as_retriever(
        search_type="mmr",
        search_kwargs={"k": 3, "lambda_mult": 0.7}
    )
    
    docs = retriever.invoke(query)
    
    if not docs:
        return "No relevant documentation found."
    
    # Format results with metadata
    context = " Retrieved Documentation:\n\n"
    for i, doc in enumerate(docs, 1):
        title = doc.metadata.get("title", "Unknown")
        content = doc.page_content[:500]
        context += f"[{i}] {title}\n{content}...\n\n"
    
    # Log retrieval
    retrieval_log.append({
        "query": query,
        "num_results": len(docs),
        "sources": [doc.metadata.get("title", "Unknown") for doc in docs]
    })
    
    return context

print(" Retrieval tool created with MMR search")

 Retrieval tool created with MMR search


In [8]:
# Define the system prompt
sys_msg = SystemMessage(content="""You are an expert Python programming tutor. Your role is to:
1. Answer questions about Python programming concepts
2. Use the retrieve_python_docs tool to get accurate information when needed
3. Only retrieve documents for Python-specific questions
4. For general knowledge (math, history, etc.), answer without retrieval
5. Provide clear, educational explanations with examples
6. Always cite the documentation source when retrieved

Decision logic:
- Retrieve if: Question is about Python syntax, libraries, patterns, or best practices
- Don't retrieve if: Question is general knowledge, greeting, or not about programming

Provide helpful, well-structured responses.""")

tools = [retrieve_python_docs]

# Bind tools to LLM
llm_with_tools = llm.bind_tools(tools)

def assistant(state: MessagesState) -> dict:
    """Assistant node - decides when to retrieve documentation."""
    messages = [sys_msg] + state["messages"]
    response = llm_with_tools.invoke(messages)
    return {"messages": [response]}

def should_continue(state: MessagesState) -> Literal["tools", "__end__"]:
    """Conditional routing - check if tools were called."""
    last_message = state["messages"][-1]
    if last_message.tool_calls:
        return "tools"
    return "__end__"

# Build graph
builder = StateGraph(MessagesState)
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))
builder.add_edge(START, "assistant")
builder.add_conditional_edges(
    "assistant",
    should_continue,
    {"tools": "tools", "__end__": END}
)
builder.add_edge("tools", "assistant")

# Compile with memory
memory = MemorySaver()
rag_agent = builder.compile(checkpointer=memory)

print(" Agentic RAG graph built and compiled")

 Agentic RAG graph built and compiled


In [11]:
# Create session
thread_id = f"rag-session-{str(uuid.uuid4())[:8]}"
query_log = []

# print("=" * 80)
# print(" AGENTIC RAG SYSTEM - PYTHON PROGRAMMING TUTOR")
# print("=" * 80)
# print(f"\nThread ID: {thread_id}")
# print("\n Test these queries:")
# print("   Queries NEEDING retrieval (5):")
# print("   1. How do Python list comprehensions work?")
# print("   2. What is a context manager and when should I use it?")
# print("   3. Explain async/await in Python")
# print("   4. How do decorators work in Python?")
# print("   5. What are generators?")
# print("\n   Queries NOT needing retrieval (5):")
# print("   6. What is the capital of France?")
# print("   7. What is 2+2?")
# print("   8. Tell me a joke")
# print("   9. What year was the internet invented?")
# print("   10. Who was the first president of the USA?")
# print("\nType 'exit' to end\n")
# print("-" * 80 + "\n")

turn = 1
while True:
    user_input = input(f"ðŸ‘¤ You (Turn {turn}): ").strip()
    
    if user_input.lower() == "exit":
        print("\n Session ended")
        break
    
    if not user_input:
        print(" Please enter a query\n")
        continue
    
    try:
        # Invoke agent
        result = rag_agent.invoke(
            {"messages": [HumanMessage(content=user_input)]},
            config={"configurable": {"thread_id": thread_id}}
        )
        
        # Check if retrieval was used
        used_retrieval = False
        for msg in result["messages"]:
            if isinstance(msg, AIMessage) and msg.tool_calls:
                used_retrieval = True
                break
        
        # Display response
        for msg in result["messages"]:
            if isinstance(msg, AIMessage) and not msg.tool_calls:
                response_text = msg.content
                print(f"\n Agent: {response_text}\n")
                print(f" Used retrieval: {'Yes ' if used_retrieval else 'No '}\n")
                query_log.append({
                    "query": user_input,
                    "retrieved": used_retrieval,
                    "turn": turn
                })
                break
        
        print("-" * 80 + "\n")
        turn += 1
        
    except Exception as e:
        print(f"\n Error: {str(e)}\n")
        print("-" * 80 + "\n")

 AGENTIC RAG SYSTEM - PYTHON PROGRAMMING TUTOR

Thread ID: rag-session-2facca49

 Test these queries:
   Queries NEEDING retrieval (5):
   1. How do Python list comprehensions work?
   2. What is a context manager and when should I use it?
   3. Explain async/await in Python
   4. How do decorators work in Python?
   5. What are generators?

   Queries NOT needing retrieval (5):
   6. What is the capital of France?
   7. What is 2+2?
   8. Tell me a joke
   9. What year was the internet invented?
   10. Who was the first president of the USA?

Type 'exit' to end

--------------------------------------------------------------------------------


 Agent: Generators are a special type of iterable in Python that allow you to iterate through a sequence of values without storing the entire sequence in memory at once. They are defined using functions and the `yield` statement, which allows the function to return a value and pause its execution, maintaining its state for the next call.

### Ke

In [13]:
# Analyze results
print("\n" + "=" * 80)
print(" TEST RESULTS & ANALYSIS")
print("=" * 80)

if query_log:
    print(f"\nTotal queries: {len(query_log)}")
    print(f"Retrieval used: {sum(1 for q in query_log if q['retrieved'])} times")
    print(f"No retrieval: {sum(1 for q in query_log if not q['retrieved'])} times")
    
    print("\n Query Breakdown:")
    print("-" * 80)
    for i, q in enumerate(query_log, 1):
        status = " Retrieved" if q["retrieved"] else  "No retrieval"
        print(f"{i}. {q['query'][:60]}... [{status}]")
else:
    print("\nNo queries were made in this session")

# Display retrieval log
print("\n" + "=" * 80)
print(" RETRIEVAL LOG")
print("=" * 80)

if retrieval_log:
    for i, entry in enumerate(retrieval_log, 1):
        print(f"\nRetrieval #{i}:")
        print(f"  Query: {entry['query']}")
        print(f"  Results: {entry['num_results']}")
        print(f"  Sources: {', '.join(entry['sources'])}")
else:
    print("\nNo retrievals were performed")

print("\n" + "=" * 80)


 TEST RESULTS & ANALYSIS

Total queries: 1
Retrieval used: 0 times
No retrieval: 1 times

 Query Breakdown:
--------------------------------------------------------------------------------
1. What are generators?... [No retrieval]

 RETRIEVAL LOG

No retrievals were performed

