# Project: Build a Basic AI-Powered FAQ Bot

# PydanticAI FAQ Bot - Module 1 Project
# AI-Powered FAQ Bot using Ollama LLM and Embedding Models

# SETUP AND INSTALLATIONS


In [None]:
# Install Ollama in Colab
!curl -fsSL https://ollama.com/install.sh | sh

# Start Ollama server in background
import subprocess
import time
import os

# Start Ollama server
print("🔄 Starting Ollama server...")
ollama_process = subprocess.Popen(['ollama', 'serve'],
                                  stdout=subprocess.DEVNULL,
                                  stderr=subprocess.DEVNULL)

# Wait for server to start
time.sleep(5)
print("✅ Ollama server started!")

# Install required packages
!pip install -q requests beautifulsoup4 numpy scikit-learn ollama python-dotenv
!pip install -q sentence-transformers chromadb

>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
🔄 Starting Ollama server...
✅ Ollama server started!
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.5/19.5 MB[0m [31m105.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m17.7 MB/s[0m eta [36m0:00:

# IMPORT LIBRARIES

In [None]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import json
import re
from typing import List, Dict, Any
from sklearn.metrics.pairwise import cosine_similarity
import ollama
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.utils import embedding_functions
import warnings
warnings.filterwarnings('ignore')

print("✅ All libraries imported successfully!")

✅ All libraries imported successfully!


# OLLAMA SETUP AND MODEL PULLING (WITH FALLBACK)

In [None]:
# Pull required models from Ollama with error handling
print("🔄 Pulling Ollama models...")

MODELS_READY = False

try:
    # Test if Ollama is accessible
    ollama.list()
    print("✅ Ollama is accessible!")

    # Pull LLM model for text generation
    print("Pulling llama3.2:3b model...")
    ollama.pull('llama3.2:3b')
    print("✅ llama3.2:3b model pulled successfully!")

    # Pull embedding model
    print("Pulling nomic-embed-text model...")
    ollama.pull('nomic-embed-text')
    print("✅ nomic-embed-text model pulled successfully!")

    MODELS_READY = True

except Exception as e:
    print(f"❌ Error with Ollama: {e}")
    print("🔄 Setting up fallback embedding model...")

    # Fallback to sentence-transformers
    from sentence_transformers import SentenceTransformer

    try:
        # Load a lightweight embedding model
        fallback_model = SentenceTransformer('all-MiniLM-L6-v2')
        print("✅ Fallback embedding model loaded successfully!")
        MODELS_READY = True
    except Exception as fallback_error:
        print(f"❌ Fallback model error: {fallback_error}")
        print("Please ensure you have internet connection for model download.")

🔄 Pulling Ollama models...
✅ Ollama is accessible!
Pulling llama3.2:3b model...
✅ llama3.2:3b model pulled successfully!
Pulling nomic-embed-text model...
✅ nomic-embed-text model pulled successfully!


# WEB SCRAPING PYDANTIC AI DOCUMENTATION

In [None]:
def scrape_pydantic_ai_content(url: str) -> Dict[str, Any]:
    """
    Scrape content from PydanticAI documentation website
    """
    try:
        print(f"🔄 Scraping content from: {url}")

        # Send GET request with headers to mimic browser
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

        response = requests.get(url, headers=headers, timeout=30)
        response.raise_for_status()

        # Parse HTML content
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract main content areas
        content_data = {
            'title': '',
            'sections': [],
            'full_text': ''
        }

        # Extract title
        title = soup.find('title')
        if title:
            content_data['title'] = title.get_text().strip()

        # Extract main content sections
        main_content = soup.find('main') or soup.find('article') or soup.find('div', class_='content')

        if main_content:
            # Extract all text content
            full_text = main_content.get_text(separator=' ', strip=True)
            content_data['full_text'] = full_text

            # Extract sections with headings
            headings = main_content.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])

            for heading in headings:
                section_text = heading.get_text().strip()

                # Get content following the heading
                next_content = []
                for sibling in heading.next_siblings:
                    if sibling.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
                        break
                    if hasattr(sibling, 'get_text'):
                        text = sibling.get_text(strip=True)
                        if text:
                            next_content.append(text)

                if next_content:
                    content_data['sections'].append({
                        'heading': section_text,
                        'content': ' '.join(next_content)
                    })

        # If no structured content found, extract all paragraphs
        if not content_data['sections']:
            paragraphs = soup.find_all('p')
            for i, p in enumerate(paragraphs):
                text = p.get_text(strip=True)
                if text and len(text) > 50:  # Only include substantial paragraphs
                    content_data['sections'].append({
                        'heading': f'Section {i+1}',
                        'content': text
                    })

        print(f"✅ Successfully scraped {len(content_data['sections'])} sections")
        print(f"📄 Total content length: {len(content_data['full_text'])} characters")

        return content_data

    except Exception as e:
        print(f"❌ Error scraping content: {e}")
        return {'title': '', 'sections': [], 'full_text': ''}

# Scrape PydanticAI documentation
pydantic_url = "https://ai.pydantic.dev/#next-steps"
scraped_content = scrape_pydantic_ai_content(pydantic_url)

# Display scraped content summary
print("\n" + "="*50)
print("📋 SCRAPED CONTENT SUMMARY")
print("="*50)
print(f"Title: {scraped_content['title']}")
print(f"Number of sections: {len(scraped_content['sections'])}")
print(f"Total content length: {len(scraped_content['full_text'])} characters")

# Show first few sections
print("\n📖 First 3 sections:")
for i, section in enumerate(scraped_content['sections'][:3]):
    print(f"\n{i+1}. {section['heading']}")
    print(f"   Content preview: {section['content'][:200]}...")

🔄 Scraping content from: https://ai.pydantic.dev/#next-steps
✅ Successfully scraped 7 sections
📄 Total content length: 12167 characters

📋 SCRAPED CONTENT SUMMARY
Title: PydanticAI
Number of sections: 7
Total content length: 12167 characters

📖 First 3 sections:

1. Introduction
   Content preview: Agent Framework / shim to use Pydantic with LLMs PydanticAI is a Python agent framework designed to make it less painful to
  build production grade applications with Generative AI. FastAPI revolution...

2. Why use PydanticAI
   Content preview: Built by the Pydantic Team:
Built by the team behindPydantic(the validation layer of the OpenAI SDK, the Anthropic SDK, LangChain, LlamaIndex, AutoGPT, Transformers, CrewAI, Instructor and many more)....

3. Hello World Example
   Content preview: Here's a minimal example of PydanticAI: hello_world.pyfrompydantic_aiimportAgentagent=Agent(# (1)!'google-gla:gemini-1.5-flash',system_prompt='Be concise, reply with one sentence.',# (2)!)result=agent...


# DOCUMENT PROCESSING AND CHUNKING

In [None]:
def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 200) -> List[str]:
    """
    Split text into overlapping chunks for better retrieval
    """
    chunks = []
    start = 0

    while start < len(text):
        end = start + chunk_size

        # Try to break at sentence boundary
        if end < len(text):
            last_period = text.rfind('.', start, end)
            if last_period != -1 and last_period > start + chunk_size // 2:
                end = last_period + 1

        chunk = text[start:end].strip()
        if chunk:
            chunks.append(chunk)

        start = end - overlap

    return chunks

# Process and chunk the content
print("\n🔄 Processing and chunking content...")

# Combine all sections into documents
documents = []
for section in scraped_content['sections']:
    # Create a document with heading and content
    doc_text = f"{section['heading']}\n{section['content']}"
    documents.append({
        'text': doc_text,
        'heading': section['heading'],
        'content': section['content']
    })

# Chunk documents for better retrieval
all_chunks = []
for doc in documents:
    chunks = chunk_text(doc['text'], chunk_size=800, overlap=150)
    for chunk in chunks:
        all_chunks.append({
            'text': chunk,
            'source': doc['heading']
        })

print(f"✅ Created {len(all_chunks)} chunks from {len(documents)} documents")


🔄 Processing and chunking content...
✅ Created 19 chunks from 7 documents


# EMBEDDING GENERATION AND VECTOR STORE (WITH FALLBACK)

In [None]:
def get_embedding(text: str) -> List[float]:
    """
    Generate embeddings using Ollama or fallback model
    """
    try:
        # Try Ollama first
        response = ollama.embeddings(model='nomic-embed-text', prompt=text)
        return response['embedding']
    except Exception as ollama_error:
        try:
            # Use fallback model
            if 'fallback_model' in globals():
                embedding = fallback_model.encode(text)
                return embedding.tolist()
            else:
                print(f"No embedding model available: {ollama_error}")
                return []
        except Exception as fallback_error:
            print(f"Fallback embedding error: {fallback_error}")
            return []

# Generate embeddings for all chunks
print("\n🔄 Generating embeddings for all chunks...")

chunk_embeddings = []
valid_chunks = []

for i, chunk in enumerate(all_chunks):
    print(f"Processing chunk {i+1}/{len(all_chunks)}", end='\r')

    embedding = get_embedding(chunk['text'])
    if embedding:
        chunk_embeddings.append(embedding)
        valid_chunks.append(chunk)

print(f"\n✅ Generated {len(chunk_embeddings)} embeddings")

if len(chunk_embeddings) == 0:
    print("❌ No embeddings generated. Please check your setup.")
    print("💡 Trying alternative approach...")

    # Create dummy embeddings for demonstration
    print("Creating demonstration embeddings...")
    import random

    for chunk in all_chunks[:10]:  # Limit to first 10 chunks
        # Create random embedding for demonstration
        dummy_embedding = [random.random() for _ in range(384)]
        chunk_embeddings.append(dummy_embedding)
        valid_chunks.append(chunk)

    print(f"✅ Created {len(chunk_embeddings)} demonstration embeddings")

# Convert to numpy array for faster similarity computation
embeddings_matrix = np.array(chunk_embeddings)


🔄 Generating embeddings for all chunks...

✅ Generated 19 embeddings


# RETRIEVAL SYSTEM

In [None]:
def retrieve_relevant_chunks(query: str, top_k: int = 3) -> List[Dict[str, Any]]:
    """
    Retrieve most relevant chunks for a given query
    """
    try:
        # Generate query embedding
        query_embedding = get_embedding(query)
        if not query_embedding:
            print("❌ Could not generate query embedding")
            return []

        # Calculate similarities
        query_vec = np.array(query_embedding).reshape(1, -1)
        similarities = cosine_similarity(query_vec, embeddings_matrix)[0]

        # Get top-k most similar chunks
        top_indices = np.argsort(similarities)[-top_k:][::-1]

        results = []
        for idx in top_indices:
            results.append({
                'text': valid_chunks[idx]['text'],
                'source': valid_chunks[idx]['source'],
                'similarity': similarities[idx]
            })

        return results

    except Exception as e:
        print(f"Error in retrieval: {e}")
        return []

# Test retrieval system
print("\n🔍 Testing retrieval system...")
test_query = "What is PydanticAI?"
test_results = retrieve_relevant_chunks(test_query)

print(f"Query: {test_query}")
print(f"Retrieved {len(test_results)} relevant chunks:")
for i, result in enumerate(test_results):
    print(f"\n{i+1}. Source: {result['source']}")
    print(f"   Similarity: {result['similarity']:.4f}")
    print(f"   Text: {result['text'][:200]}...")


🔍 Testing retrieval system...
Query: What is PydanticAI?
Retrieved 3 relevant chunks:

1. Source: Next Steps
   Similarity: 0.7647
   Text: Next Steps
To try PydanticAI yourself, follow the instructionsin the examples. Read thedocsto learn more about building applications with PydanticAI. Read theAPI Referenceto understand PydanticAI's in...

2. Source: Why use PydanticAI
   Similarity: 0.7108
   Text: Why use PydanticAI
Built by the Pydantic Team:
Built by the team behindPydantic(the validation layer of the OpenAI SDK, the Anthropic SDK, LangChain, LlamaIndex, AutoGPT, Transformers, CrewAI, Instruc...

3. Source: Introduction
   Similarity: 0.7031
   Text: Introduction
Agent Framework / shim to use Pydantic with LLMs PydanticAI is a Python agent framework designed to make it less painful to
  build production grade applications with Generative AI. FastA...


# LLM RESPONSE GENERATION

In [None]:
def generate_response(query: str, context_chunks: List[Dict[str, Any]]) -> str:
    """
    Generate response using Ollama LLM with retrieved context
    """
    try:
        # Prepare context
        context = "\n\n".join([f"Context {i+1}:\n{chunk['text']}" for i, chunk in enumerate(context_chunks)])

        # Create prompt
        prompt = f"""You are a helpful assistant that answers questions about PydanticAI based on the provided context.

Context Information:
{context}

Question: {query}

Please provide a comprehensive answer based on the context provided. If the information is not available in the context, please say so.

Answer:"""

        # Try to generate response using Ollama
        try:
            response = ollama.generate(
                model='llama3.2:3b',
                prompt=prompt,
                options={
                    'temperature': 0.7,
                    'max_tokens': 500,
                    'top_p': 0.9
                }
            )
            return response['response']

        except Exception as ollama_error:
            print(f"❌ Ollama LLM error: {ollama_error}")

            # Fallback to basic response
            if context_chunks:
                return f"Based on the available context about PydanticAI: {context_chunks[0]['text'][:300]}..."
            else:
                return "I apologize, but I couldn't find relevant information to answer your question about PydanticAI."

    except Exception as e:
        print(f"Error generating response: {e}")
        return "I apologize, but I encountered an error while generating a response."

# COMPLETE FAQ BOT CLASS

In [None]:
class PydanticAIFAQBot:
    def __init__(self):
        self.embeddings_matrix = embeddings_matrix
        self.chunks = valid_chunks

    def ask(self, question: str) -> Dict[str, Any]:
        """
        Ask a question to the FAQ bot
        """
        print(f"\n❓ Question: {question}")
        print("-" * 50)

        # Retrieve relevant context
        relevant_chunks = retrieve_relevant_chunks(question, top_k=3)

        if not relevant_chunks:
            return {
                'question': question,
                'answer': "I couldn't find relevant information to answer your question.",
                'sources': []
            }

        # Generate response
        answer = generate_response(question, relevant_chunks)

        # Prepare response
        response = {
            'question': question,
            'answer': answer,
            'sources': [chunk['source'] for chunk in relevant_chunks],
            'relevance_scores': [chunk['similarity'] for chunk in relevant_chunks]
        }

        print(f"🤖 Answer: {answer}")
        print(f"\n📚 Sources: {', '.join(response['sources'])}")
        print(f"🎯 Relevance scores: {[f'{score:.3f}' for score in response['relevance_scores']]}")

        return response

# Initialize the FAQ bot
faq_bot = PydanticAIFAQBot()

print("\n" + "="*60)
print("🤖 PYDANTIC AI FAQ BOT INITIALIZED")
print("="*60)


🤖 PYDANTIC AI FAQ BOT INITIALIZED


# QUESTION-ANSWERING TASK

In [None]:
# Define the 5 conceptual questions about PydanticAI
questions = [
    "What is PydanticAI and why was it created?",
    "How is PydanticAI similar to FastAPI in terms of developer experience?",
    "What makes PydanticAI type-safe and structured?",
    "How does PydanticAI support streaming and debugging?",
    "What is llms.txt?"
]

print("\n" + "="*60)
print("📝 QUESTION-ANSWERING SESSION")
print("="*60)

# Store all responses
all_responses = []

for i, question in enumerate(questions, 1):
    print(f"\n{'='*20} QUESTION {i}/5 {'='*20}")
    response = faq_bot.ask(question)
    all_responses.append(response)
    print("\n" + "="*60)


📝 QUESTION-ANSWERING SESSION


❓ Question: What is PydanticAI and why was it created?
--------------------------------------------------
🤖 Answer: Based on the context provided, PydanticAI appears to be a Python agent framework designed to make it easier to build production-grade applications with Generative AI (LLMs). According to Context 3, PydanticAI was created with the aim of bringing the same feeling and ergonomic design experience that FastAPI offers to GenAI app development.

While the context does not explicitly state what PydanticAI is or its full purpose, it can be inferred that it is a framework built on top of Pydantic, which is a popular validation layer used in various OpenAI, Anthropic, and other AI-related SDKs. By leveraging Pydantic's strengths, PydanticAI aims to simplify the process of developing applications with LLMs, making it more accessible and user-friendly for developers.

In summary, PydanticAI was created to provide a Python agent framework that enables e

# SUMMARY AND RESULTS

In [None]:
print("\n" + "="*60)
print("📊 SUMMARY OF FAQ BOT PERFORMANCE")
print("="*60)

print(f"✅ Successfully scraped content from: {pydantic_url}")
print(f"✅ Processed {len(documents)} documents into {len(all_chunks)} chunks")
print(f"✅ Generated {len(chunk_embeddings)} embeddings using 'nomic-embed-text'")
print(f"✅ Used 'llama3.2:3b' model for response generation")
print(f"✅ Answered {len(questions)} questions successfully")

print("\n📋 MODELS USED:")
print("- LLM Model: llama3.2:3b (for text generation)")
print("- Embedding Model: nomic-embed-text (for semantic search)")

print("\n🔧 TECHNICAL APPROACH:")
print("- Content Scraping: BeautifulSoup for HTML parsing")
print("- Text Processing: Chunking with overlap for better retrieval")
print("- Embedding Generation: Ollama embedding API")
print("- Similarity Search: Cosine similarity for relevant chunk retrieval")
print("- Response Generation: Context-aware prompting with Ollama LLM")

print("\n💡 AVERAGE RELEVANCE SCORES:")
for i, response in enumerate(all_responses, 1):
    avg_score = np.mean(response['relevance_scores'])
    print(f"Question {i}: {avg_score:.3f}")

print("\n🎉 FAQ Bot setup and testing completed successfully!")
print("="*60)


📊 SUMMARY OF FAQ BOT PERFORMANCE
✅ Successfully scraped content from: https://ai.pydantic.dev/#next-steps
✅ Processed 7 documents into 19 chunks
✅ Generated 19 embeddings using 'nomic-embed-text'
✅ Used 'llama3.2:3b' model for response generation
✅ Answered 5 questions successfully

📋 MODELS USED:
- LLM Model: llama3.2:3b (for text generation)
- Embedding Model: nomic-embed-text (for semantic search)

🔧 TECHNICAL APPROACH:
- Content Scraping: BeautifulSoup for HTML parsing
- Text Processing: Chunking with overlap for better retrieval
- Embedding Generation: Ollama embedding API
- Similarity Search: Cosine similarity for relevant chunk retrieval
- Response Generation: Context-aware prompting with Ollama LLM

💡 AVERAGE RELEVANCE SCORES:
Question 1: 0.729
Question 2: 0.746
Question 3: 0.701
Question 4: 0.678
Question 5: 0.644

🎉 FAQ Bot setup and testing completed successfully!


# OPTIONAL: INTERACTIVE TESTING

In [None]:
print("\n🔧 FAQ Bot is ready for interactive use!")
print("You can now ask questions about PydanticAI using: faq_bot.ask('your question here')")

# Example of additional usage
print("\n💡 Example of additional usage:")
example_response = faq_bot.ask("How does PydanticAI handle type validation?")


🔧 FAQ Bot is ready for interactive use!
You can now ask questions about PydanticAI using: faq_bot.ask('your question here')

💡 Example of additional usage:

❓ Question: How does PydanticAI handle type validation?
--------------------------------------------------
🤖 Answer: Based on the provided context, PydanticAI handles type validation through its design as a Type-safe model. The context states that PydanticAI "Designed to make type checking as powerful and informative as possible for you." This suggests that PydanticAI has been built with type safety in mind, allowing users to take advantage of powerful and informative type checking.

Furthermore, the context mentions that the agent is typed as a `SupportOutput`, which implies that PydanticAI provides strong typing and validation capabilities. Additionally, it's mentioned that if validation fails, reflection will occur, and the agent will be prompted to try again, indicating that PydanticAI has robust type checking mechanisms in pl