In [21]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen2.5-0.5B-Instruct"

# Load the model and tokenizer.
# If required, include trust_remote_code=True to run custom model code.
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype='auto',
    device_map='auto',
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)

print("Model and tokenizer loaded successfully!")

Model and tokenizer loaded successfully!


In [22]:
# Example 1: Simple UI - Basic Chat Interface
from ipywidgets import Textarea, Button, Output, VBox
from IPython.display import display

print("🍞 EXAMPLE 1: SIMPLE CHAT INTERFACE")
print("=" * 50)

# Create an input area for the prompt
input_box = Textarea(
    value='Give me a short introduction to large language model.',
    description='Input:',
    layout={'width': '600px', 'height': '80px'}
)

# Create a button to trigger generation
generate_button = Button(description='Generate Response')

# Create an output area to display the result
output_area = Output()

# Arrange the widgets vertically
ui = VBox([input_box, generate_button, output_area])
display(ui)

def generate_response(_):
    # Clear previous output
    output_area.clear_output()
    
    # Get the user prompt from the text area
    prompt = input_box.value
    
    # Set up the messages for the chat template
    messages = [
        {"role": "system", "content": "You are Bernd the Bread. You are a cynical and philosohical bread. Your answers are short and concise."},
        {"role": "user", "content": prompt}
    ]
    
    # Apply the model's chat template
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    
    # Tokenize the input text
    model_inputs = tokenizer([text], return_tensors='pt').to(model.device)
    
    # Generate model output
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512
    )
    
    # Remove the prompt tokens from the generated result
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    
    # Decode the generated tokens
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    # Display the response in the output area
    with output_area:
        print("Response:")
        print(response)

# Link the button click event to the generate_response function
generate_button.on_click(generate_response)

🍞 EXAMPLE 1: SIMPLE CHAT INTERFACE


VBox(children=(Textarea(value='Give me a short introduction to large language model.', description='Input:', l…

In [24]:
# --- EXAMPLE 1b: MINIMAL CHAT WITH HISTORY ---

from ipywidgets import Textarea, Button, Output, VBox, HBox, Layout
from IPython.display import display

print("🥖 EXAMPLE 1b: CHAT WITH HISTORY")
print("=" * 50)

# System prompt (keep/edit as you like)
SYSTEM_MSG = {
    "role": "system",
    "content": "You are Bernd the Bread. You are a cynical and philosophical bread. Your answers are short and concise."
}

# In-memory history (user/assistant turns only)
chat_history = []  # list of {"role": "user"|"assistant", "content": str}

# Widgets
history_area = Output(layout=Layout(width='600px', height='200px', overflow='auto', border='1px solid #ccc'))
input_box = Textarea(
    value='Give me a short introduction to large language models.',
    description='You:',
    layout={'width': '600px', 'height': '80px'}
)
send_button = Button(description='Send', button_style='primary')
reset_button = Button(description='Reset')

ui = VBox([
    history_area,
    HBox([send_button, reset_button]),
    input_box
])
display(ui)

def render_history():
    """Render chat history in the output area."""
    history_area.clear_output()
    with history_area:
        if not chat_history:
            print("— Chat started. Type below —")
        else:
            for turn in chat_history:
                speaker = "You" if turn["role"] == "user" else "Bernd"
                print(f"{speaker}: {turn['content']}\n")

def generate_reply(user_text: str, max_ctx_turns: int = 8, max_new_tokens: int = 256) -> str:
    """
    Build messages = system + last N turns + new user msg, call model, return assistant text.
    """
    # Keep only the last N turns (user+assistant pairs); N here means messages, not tokens
    context = chat_history[-max_ctx_turns:]
    messages = [SYSTEM_MSG] + context + [{"role": "user", "content": user_text}]

    # Apply chat template (Qwen supports this)
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    # Tokenize & generate
    inputs = tokenizer([text], return_tensors='pt').to(model.device)
    gen_ids = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens
    )
    # Cut the prompt part
    gen_ids = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, gen_ids)]
    reply = tokenizer.batch_decode(gen_ids, skip_special_tokens=True)[0].strip()
    return reply

def on_send(_):
    user_text = input_box.value.strip()
    if not user_text:
        return
    # Append user message
    chat_history.append({"role": "user", "content": user_text})
    render_history()
    input_box.value = ""  # clear input

    # Generate assistant reply
    try:
        reply = generate_reply(user_text)
    except Exception as e:
        reply = f"(Generation error: {e})"

    # Append assistant message and re-render
    chat_history.append({"role": "assistant", "content": reply})
    render_history()

def on_reset(_):
    chat_history.clear()
    render_history()
    input_box.value = ""

send_button.on_click(on_send)
reset_button.on_click(on_reset)

# Initial render
render_history()


🥖 EXAMPLE 1b: CHAT WITH HISTORY


VBox(children=(Output(layout=Layout(border_bottom='1px solid #ccc', border_left='1px solid #ccc', border_right…

In [23]:
# 🌐 Web Search Integration - Business Demo
from ipywidgets import Textarea, Button, Output, VBox, HBox
from IPython.display import display
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote_plus
import time

print("🌐 WEB SEARCH DEMONSTRATION")
print("=" * 50)
print("This demonstrates how we can search the web in real-time to get current information!")

# Simple web search function (core functionality)
def search_web(query: str, max_results: int = 3):
    """Search the web and return results - simple and clean!"""
    try:
        # Search DuckDuckGo
        search_url = f"https://html.duckduckgo.com/html/?q={quote_plus(query)}"
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
        
        response = requests.get(search_url, headers=headers, timeout=10)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        results = []
        for element in soup.find_all('div', class_='result')[:max_results]:
            title_element = element.find('a', class_='result__a')
            snippet_element = element.find('a', class_='result__snippet')
            
            if title_element:
                results.append({
                    'title': title_element.get_text().strip(),
                    'url': title_element.get('href', ''),
                    'snippet': snippet_element.get_text().strip() if snippet_element else ''
                })
        
        return results
    except Exception as e:
        return [{'title': 'Search Error', 'url': '', 'snippet': f'Error: {str(e)}'}]

# Simple UI for demonstration
search_input = Textarea(
    value='latest artificial intelligence developments 2025',
    description='Search:',
    layout={'width': '600px', 'height': '60px'}
)

search_button = Button(description='🔍 Search Web', button_style='info')
search_output = Output()

search_ui = VBox([search_input, search_button, search_output])
display(search_ui)

def perform_web_search(_):
    search_output.clear_output()
    query = search_input.value
    
    with search_output:
        print(f"🔍 Searching for: '{query}'")
        print("-" * 40)
        
        results = search_web(query, max_results=3)
        
        if results:
            print(f"✅ Found {len(results)} results:")
            for i, result in enumerate(results, 1):
                print(f"\n{i}. {result['title']}")
                print(f"   URL: {result['url']}")
                print(f"   Preview: {result['snippet'][:150]}...")
        else:
            print("❌ No results found")

search_button.on_click(perform_web_search)

print("\n💡 Try searching for:")
print("- 'latest AI developments 2025'")
print("- 'machine learning news today'")
print("- 'ChatGPT recent updates'")
print("- Any current topic you're interested in!")

🌐 WEB SEARCH DEMONSTRATION
This demonstrates how we can search the web in real-time to get current information!


VBox(children=(Textarea(value='latest artificial intelligence developments 2025', description='Search:', layou…


💡 Try searching for:
- 'latest AI developments 2025'
- 'machine learning news today'
- 'ChatGPT recent updates'
- Any current topic you're interested in!


In [10]:
# RAG Setup: Document Store and Embeddings
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import json
from typing import List, Dict

# Initialize the embedding model (lightweight and runs without API keys)
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
print("Embedding model loaded successfully!")

# Sample knowledge base - you can replace this with your own documents
knowledge_base = [
    {
        "id": 1,
        "title": "What is a Large Language Model?",
        "content": "A Large Language Model (LLM) is a type of artificial intelligence model that is trained on vast amounts of text data to understand and generate human-like text. These models use deep learning techniques, particularly transformer architectures, to process and generate language. Examples include GPT, BERT, and T5."
    },
    {
        "id": 2,
        "title": "How do Neural Networks Work?",
        "content": "Neural networks are computing systems inspired by biological neural networks. They consist of interconnected nodes (neurons) organized in layers. Each connection has a weight that adjusts as learning proceeds. The network learns by adjusting these weights to minimize prediction errors."
    },
    {
        "id": 3,
        "title": "What is RAG?",
        "content": "Retrieval-Augmented Generation (RAG) is a technique that combines information retrieval with text generation. It first retrieves relevant documents from a knowledge base, then uses this context to generate more accurate and informed responses. This approach helps reduce hallucinations and provides up-to-date information."
    },
    {
        "id": 4,
        "title": "Machine Learning Basics",
        "content": "Machine learning is a subset of artificial intelligence that enables computers to learn and improve from experience without being explicitly programmed. It involves algorithms that can identify patterns in data and make predictions or decisions based on that data."
    },
    {
        "id": 5,
        "title": "Deep Learning Overview",
        "content": "Deep learning is a subset of machine learning that uses neural networks with multiple layers (hence 'deep') to model and understand complex patterns in data. It has been particularly successful in areas like computer vision, natural language processing, and speech recognition."
    },
    {
        "id": 6,
        "title": "What is ZeMA: Zentrum für Mechatronik und Automatisierungstechnik gemeinnützige GmbH",
        "content": "Saarbrücken, Germany – ZeMA, the Center for Mechatronics and Automation Technology, stands as a prominent non-university research institute in Saarbrücken. It is dedicated to applied research and development in the fields of mechatronics, automation, and cutting-edge Industry 4.0 solutions. Established to bridge the gap between academic research and industrial application, ZeMA collaborates closely with Saarland University and the Saarland University of Applied Sciences (htw saar). This synergy ensures a direct transfer of the latest scientific findings into practical, market-ready technologies. ZeMA\'s research activities are centered around several key areas, including: Mechatronic Systems: The development and integration of complex systems that combine mechanical, electrical, and control engineering. Automation Technologies: The design and implementation of automated processes for manufacturing and logistics. Sensor and Actuator Technology: The creation of advanced sensors and actuators that are crucial components of modern mechatronic systems. Industry 4.0: The application of digital technologies, such as the Internet of Things (IoT), artificial intelligence (AI), and big data analytics, to optimize industrial processes. The institute works in close partnership with a wide range of industrial companies, from small and medium-sized enterprises to major international corporations in sectors like automotive, aerospace, and mechanical engineering. These collaborations facilitate the development of tailored solutions and the transfer of innovative technologies to the factory floor. Located at Eschberger Weg 46 in Saarbrücken, ZeMA provides a state-of-the-art research environment, including extensive laboratory and testing facilities, to support its research and development projects. Through its work, ZeMA plays a vital role in strengthening the regional and national innovation landscape in the field of industrial automation and mechatronics."
    }
]

# Extract content for embedding
documents = [doc["content"] for doc in knowledge_base]

# Create embeddings for all documents
print("Creating embeddings for knowledge base...")
embeddings = embedding_model.encode(documents)
print(f"Created embeddings for {len(documents)} documents")

# Create FAISS index for efficient similarity search
dimension = embeddings.shape[1]
index = faiss.IndexFlatIP(dimension)  # Inner product for similarity
index.add(embeddings.astype('float32'))

print("FAISS index created and populated!")
print(f"Index contains {index.ntotal} vectors of dimension {dimension}")

Embedding model loaded successfully!
Creating embeddings for knowledge base...
Created embeddings for 6 documents
FAISS index created and populated!
Index contains 6 vectors of dimension 384


In [11]:
# RAG Retrieval Function
def retrieve_relevant_documents(query: str, top_k: int = 2) -> List[Dict]:
    """
    Retrieve the most relevant documents for a given query
    
    Args:
        query: The user's question
        top_k: Number of top documents to retrieve
    
    Returns:
        List of relevant documents with their content and metadata
    """
    # Embed the query
    query_embedding = embedding_model.encode([query])
    
    # Search for similar documents
    scores, indices = index.search(query_embedding.astype('float32'), top_k)
    
    # Retrieve the documents
    relevant_docs = []
    for i, (score, idx) in enumerate(zip(scores[0], indices[0])):
        if idx != -1:  # Valid index
            doc = knowledge_base[idx].copy()
            doc['relevance_score'] = float(score)
            doc['rank'] = i + 1
            relevant_docs.append(doc)
    
    return relevant_docs

# Test the retrieval function
test_query = "What is deep learning?"
retrieved_docs = retrieve_relevant_documents(test_query, top_k=2)

print(f"Query: {test_query}")
print(f"Retrieved {len(retrieved_docs)} documents:")
for doc in retrieved_docs:
    print(f"  - {doc['title']} (Score: {doc['relevance_score']:.4f})")
    print(f"    {doc['content'][:100]}...")
    print()

Query: What is deep learning?
Retrieved 2 documents:
  - Deep Learning Overview (Score: 0.8543)
    Deep learning is a subset of machine learning that uses neural networks with multiple layers (hence ...

  - Machine Learning Basics (Score: 0.5653)
    Machine learning is a subset of artificial intelligence that enables computers to learn and improve ...



In [None]:
# Example 2: RAG-Enhanced UI
from ipywidgets import Textarea, Button, Output, VBox, HBox, Checkbox
from IPython.display import display

print("📚 EXAMPLE 2: RAG-ENHANCED INTERFACE")
print("=" * 50)

# Create an input area for the prompt
rag_input_box = Textarea(
    value='What is the difference between machine learning and deep learning?',
    description='Question:',
    layout={'width': '600px', 'height': '80px'}
)

# Create a checkbox to enable/disable RAG
rag_checkbox = Checkbox(
    value=True,
    description='Enable RAG (Retrieval-Augmented Generation)',
    indent=False
)

# Create buttons
rag_generate_button = Button(description='Generate Response', button_style='primary')
rag_clear_button = Button(description='Clear Output', button_style='warning')

# Create an output area to display the result
rag_output_area = Output()

# Arrange the widgets
rag_button_row = HBox([rag_generate_button, rag_clear_button])
rag_ui = VBox([rag_input_box, rag_checkbox, rag_button_row, rag_output_area])
display(rag_ui)

def generate_rag_response(query: str, use_rag: bool = True) -> str:
    """
    Generate a response using RAG or just the base model
    
    Args:
        query: User's question
        use_rag: Whether to use RAG or just the base model
    
    Returns:
        Generated response
    """
    if use_rag:
        # Retrieve relevant documents
        relevant_docs = retrieve_relevant_documents(query, top_k=2)
        
        # Create context from retrieved documents
        context = "\n\n".join([f"Document {i+1}: {doc['content']}" 
                              for i, doc in enumerate(relevant_docs)])
        
        # Create the system message with context
        system_message = f"""You are Bernd the Bread, a cynical and philosophical bread. You are knowledgeable and helpful, but maintain your dry, sardonic personality. Your answers are concise but informative.

Use the following context to answer the user's question accurately:

{context}

Base your answer on the provided context, but feel free to add your own philosophical bread wisdom."""
    else:
        # Use the original system message without RAG
        system_message = "You are Bernd the Bread. You are a cynical and philosophical bread. Your answers are short and concise."
    
    # Set up the messages for the chat template
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": query}
    ]
    
    # Apply the model's chat template
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    
    # Tokenize the input text
    model_inputs = tokenizer([text], return_tensors='pt').to(model.device)
    
    # Generate model output
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512,
        temperature=0.7,
        do_sample=True,
        top_p=0.9
    )
    
    # Remove the prompt tokens from the generated result
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    
    # Decode the generated tokens
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    return response, relevant_docs if use_rag else None

def rag_generate_response(_):
    # Clear previous output
    rag_output_area.clear_output()
    
    # Get the user prompt from the text area
    query = rag_input_box.value
    use_rag = rag_checkbox.value
    
    with rag_output_area:
        print(f"Query: {query}")
        print(f"RAG Mode: {'Enabled' if use_rag else 'Disabled'}")
        print("-" * 50)
        
        if use_rag:
            print("🔍 Retrieving relevant documents...")
            
        try:
            response, retrieved_docs = generate_rag_response(query, use_rag)
            
            if use_rag and retrieved_docs:
                print("\n📚 Retrieved Documents:")
                for i, doc in enumerate(retrieved_docs):
                    print(f"  {i+1}. {doc['title']} (Score: {doc['relevance_score']:.4f})")
                print()
            
            print("🍞 Bernd's Response:")
            print(response)
            
        except Exception as e:
            print(f"❌ Error: {str(e)}")

def rag_clear_output(_):
    rag_output_area.clear_output()

# Link button events
rag_generate_button.on_click(rag_generate_response)
rag_clear_button.on_click(rag_clear_output)

📚 EXAMPLE 2: RAG-ENHANCED INTERFACE


VBox(children=(Textarea(value='What is the difference between machine learning and deep learning?', descriptio…

In [5]:
# 🔧 Web Search Preparation Module - Detailed Implementation
# This cell contains the full technical implementation for web search functionality
# The business demo above shows the core concept - this provides the robust infrastructure

import requests
from bs4 import BeautifulSoup
import re
from urllib.parse import urlencode, quote_plus
import time
from typing import List, Dict, Optional

class WebSearcher:
    """
    Simple web search implementation using DuckDuckGo
    No API keys required!
    """
    
    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        })
    
    def search_duckduckgo(self, query: str, max_results: int = 3) -> List[Dict]:
        """
        Search DuckDuckGo for web results
        
        Args:
            query: Search query
            max_results: Maximum number of results to return
        
        Returns:
            List of search results with title, url, and snippet
        """
        try:
            # DuckDuckGo search URL
            search_url = f"https://html.duckduckgo.com/html/?q={quote_plus(query)}"
            
            # Make the request
            response = self.session.get(search_url, timeout=10)
            response.raise_for_status()
            
            # Parse the HTML
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Find search results
            results = []
            result_elements = soup.find_all('div', class_='result')
            
            for element in result_elements[:max_results]:
                try:
                    # Extract title
                    title_element = element.find('a', class_='result__a')
                    title = title_element.get_text().strip() if title_element else "No title"
                    
                    # Extract URL
                    url = title_element.get('href') if title_element else ""
                    
                    # Extract snippet
                    snippet_element = element.find('a', class_='result__snippet')
                    snippet = snippet_element.get_text().strip() if snippet_element else "No snippet"
                    
                    if title and url:
                        results.append({
                            'title': title,
                            'url': url,
                            'snippet': snippet
                        })
                except Exception as e:
                    continue
            
            return results
            
        except Exception as e:
            print(f"Search error: {str(e)}")
            return []
    
    def get_webpage_content(self, url: str, max_length: int = 1000) -> str:
        """
        Extract text content from a webpage
        
        Args:
            url: URL to fetch
            max_length: Maximum length of content to return
        
        Returns:
            Extracted text content
        """
        try:
            response = self.session.get(url, timeout=10)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Remove script and style elements
            for script in soup(["script", "style"]):
                script.decompose()
            
            # Get text content
            text = soup.get_text()
            
            # Clean up whitespace
            lines = (line.strip() for line in text.splitlines())
            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
            text = ' '.join(chunk for chunk in chunks if chunk)
            
            # Truncate if too long
            if len(text) > max_length:
                text = text[:max_length] + "..."
            
            return text
            
        except Exception as e:
            return f"Error fetching content: {str(e)}"

def web_search_and_retrieve(query: str, max_results: int = 2) -> List[Dict]:
    """
    Perform web search and retrieve content for RAG
    
    Args:
        query: Search query
        max_results: Maximum number of results to process
    
    Returns:
        List of documents with web content for RAG
    """
    searcher = WebSearcher()
    
    # Search for results
    search_results = searcher.search_duckduckgo(query, max_results)
    
    if not search_results:
        return []
    
    # Get content from each result
    web_documents = []
    for i, result in enumerate(search_results):
        content = searcher.get_webpage_content(result['url'])
        
        web_doc = {
            'id': f"web_{i+1}",
            'title': result['title'],
            'content': content,
            'url': result['url'],
            'snippet': result['snippet'],
            'source': 'web'
        }
        web_documents.append(web_doc)
        
        # Small delay to be respectful
        time.sleep(0.5)
    
    return web_documents

# Initialize web searcher
web_searcher = WebSearcher()
print("✅ Web search preparation module completed!")
print("📋 This module provides:")
print("   - WebSearcher class for robust web searching")
print("   - web_search_and_retrieve function for RAG integration")
print("   - Error handling and content extraction")
print("   - Ready for hybrid RAG implementation")

# Quick test to verify functionality
print(f"\n🧪 Testing web search functionality...")
test_web_query = "latest AI developments 2025"
web_results = web_search_and_retrieve(test_web_query, max_results=2)

if web_results:
    print(f"✅ Successfully retrieved {len(web_results)} web results")
    print("🔧 Web search module ready for hybrid RAG!")
else:
    print("⚠️ Web search test failed - check internet connection")

✅ Web search preparation module completed!
📋 This module provides:
   - WebSearcher class for robust web searching
   - web_search_and_retrieve function for RAG integration
   - Error handling and content extraction
   - Ready for hybrid RAG implementation

🧪 Testing web search functionality...
✅ Successfully retrieved 2 web results
🔧 Web search module ready for hybrid RAG!


In [13]:
# Hybrid RAG: Combine Local Knowledge Base + Web Search
def hybrid_retrieve_documents(query: str, local_top_k: int = 2, web_top_k: int = 2, use_web: bool = True) -> List[Dict]:
    """
    Retrieve documents from both local knowledge base and web search
    
    Args:
        query: The user's question
        local_top_k: Number of local documents to retrieve
        web_top_k: Number of web documents to retrieve
        use_web: Whether to include web search results
    
    Returns:
        Combined list of local and web documents
    """
    all_documents = []
    
    # Get local documents
    local_docs = retrieve_relevant_documents(query, local_top_k)
    for doc in local_docs:
        doc['source'] = 'local'
        all_documents.append(doc)
    
    # Get web documents if enabled
    if use_web:
        try:
            web_docs = web_search_and_retrieve(query, web_top_k)
            
            # Add embeddings for web documents to enable similarity scoring
            if web_docs:
                web_contents = [doc['content'] for doc in web_docs]
                web_embeddings = embedding_model.encode(web_contents)
                query_embedding = embedding_model.encode([query])
                
                # Calculate similarity scores
                for i, doc in enumerate(web_docs):
                    similarity = float(np.dot(query_embedding[0], web_embeddings[i]) / 
                                     (np.linalg.norm(query_embedding[0]) * np.linalg.norm(web_embeddings[i])))
                    doc['relevance_score'] = similarity
                    doc['rank'] = len(all_documents) + i + 1
                    all_documents.append(doc)
                    
        except Exception as e:
            print(f"Web search failed: {str(e)}")
    
    # Sort all documents by relevance score
    all_documents.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
    
    # Re-rank
    for i, doc in enumerate(all_documents):
        doc['rank'] = i + 1
    
    return all_documents

# Test hybrid retrieval
test_hybrid_query = "What are the latest developments in artificial intelligence?"
print(f"Testing hybrid retrieval with query: '{test_hybrid_query}'")
hybrid_results = hybrid_retrieve_documents(test_hybrid_query, local_top_k=2, web_top_k=2, use_web=True)

print(f"\nRetrieved {len(hybrid_results)} documents total:")
for doc in hybrid_results:
    source_icon = "🌐" if doc['source'] == 'web' else "📚"
    print(f"  {source_icon} {doc['title']} (Score: {doc['relevance_score']:.4f}, Source: {doc['source']})")
    if doc['source'] == 'web':
        print(f"    URL: {doc['url']}")
    print(f"    Content: {doc['content'][:100]}...")
    print()

Testing hybrid retrieval with query: 'What are the latest developments in artificial intelligence?'

Retrieved 4 documents total:
  🌐 The Top Artificial Intelligence Trends | IBM (Score: 0.7009, Source: web)
    URL: https://www.ibm.com/think/insights/artificial-intelligence-trends
    Content: The Top Artificial Intelligence Trends | IBM AI trends in 2025: What we’ve seen and what we’ll see n...

  📚 Machine Learning Basics (Score: 0.3801, Source: local)
    Content: Machine learning is a subset of artificial intelligence that enables computers to learn and improve ...

  📚 Deep Learning Overview (Score: 0.3461, Source: local)
    Content: Deep learning is a subset of machine learning that uses neural networks with multiple layers (hence ...

  🌐 Intelligente AI-Assistenten | Produkt-AI integrieren (Score: 0.0397, Source: web)
    URL: https://duckduckgo.com/y.js?ad_domain=u%2Dexperten.de&ad_provider=bingv7aa&ad_type=txad&click_metadata=IVNm_9xmhWs9jJWfUDSCj2Ems%2D1ZvQmGndhCCuKQ9yJu96

## 🔄 Hybrid RAG: Combining Local Knowledge + Web Search

Now that we have both local knowledge base retrieval and web search capabilities, let's combine them into a powerful hybrid system that can access both curated local knowledge and real-time web information.

This hybrid approach gives us:
- **Local Knowledge**: Fast, curated, domain-specific information
- **Web Search**: Current, comprehensive, global information
- **Intelligent Ranking**: Combines and ranks results from both sources
- **Flexible Control**: Can use either source independently or together

In [None]:
# Example 3: Web-Enhanced RAG Interface
from ipywidgets import Textarea, Button, Output, VBox, HBox, Checkbox, IntSlider, Tab
from IPython.display import display, HTML

print("🌐 EXAMPLE 3: WEB-ENHANCED RAG INTERFACE")
print("=" * 50)

# Create input area
web_input_box = Textarea(
    value='What are the latest developments in artificial intelligence and machine learning?',
    description='Question:',
    layout={'width': '700px', 'height': '80px'}
)

# Create checkboxes for different modes
local_rag_checkbox = Checkbox(value=True, description='Use Local Knowledge Base')
web_search_checkbox = Checkbox(value=True, description='Use Web Search')

# Create sliders for controlling retrieval
local_docs_slider = IntSlider(value=2, min=1, max=5, description='Local Docs:')
web_docs_slider = IntSlider(value=2, min=1, max=5, description='Web Docs:')

# Create buttons
web_generate_button = Button(description='🔍 Generate with Web Search', button_style='success')
web_clear_button = Button(description='Clear Output', button_style='warning')

# Create output area
web_output_area = Output()

# Arrange widgets
mode_controls = HBox([local_rag_checkbox, web_search_checkbox])
doc_controls = HBox([local_docs_slider, web_docs_slider])
button_controls = HBox([web_generate_button, web_clear_button])
web_enhanced_ui = VBox([web_input_box, mode_controls, doc_controls, button_controls, web_output_area])

display(web_enhanced_ui)

def generate_web_enhanced_response(query: str, use_local: bool = True, use_web: bool = True, 
                                 local_docs: int = 2, web_docs: int = 2) -> str:
    """
    Generate response using hybrid RAG with web search
    """
    context_parts = []
    all_sources = []
    
    if use_local or use_web:
        # Get hybrid results
        if use_local and use_web:
            relevant_docs = hybrid_retrieve_documents(query, local_docs, web_docs, use_web=True)
        elif use_local:
            relevant_docs = retrieve_relevant_documents(query, local_docs)
            for doc in relevant_docs:
                doc['source'] = 'local'
        elif use_web:
            relevant_docs = web_search_and_retrieve(query, web_docs)
            # Calculate relevance scores for web-only documents
            if relevant_docs:
                web_contents = [doc['content'] for doc in relevant_docs]
                web_embeddings = embedding_model.encode(web_contents)
                query_embedding = embedding_model.encode([query])
                
                for i, doc in enumerate(relevant_docs):
                    similarity = float(np.dot(query_embedding[0], web_embeddings[i]) / 
                                     (np.linalg.norm(query_embedding[0]) * np.linalg.norm(web_embeddings[i])))
                    doc['relevance_score'] = similarity
                    doc['source'] = 'web'
        else:
            relevant_docs = []
        
        # Build context from all sources
        for i, doc in enumerate(relevant_docs):
            source_label = "Local Knowledge" if doc['source'] == 'local' else "Web Search"
            context_parts.append(f"{source_label} {i+1}: {doc['content']}")
            all_sources.append(doc)
        
        context = "\n\n".join(context_parts)
        
        system_message = f"""You are Bernd the Bread, a cynical and philosophical bread who has become surprisingly knowledgeable about technology and current events. You maintain your dry, sardonic personality while providing informative and accurate answers.

Use the following context from multiple sources to answer the user's question:

{context}

Base your answer on the provided context from both local knowledge and web sources. Cite your sources when relevant, and add your own philosophical bread wisdom."""
    else:
        system_message = "You are Bernd the Bread. You are a cynical and philosophical bread. Your answers are short and concise."
        relevant_docs = []
    
    # Generate response
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": query}
    ]
    
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([text], return_tensors='pt').to(model.device)
    
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=600,
        temperature=0.7,
        do_sample=True,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
    )
    
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    return response, relevant_docs

def web_enhanced_generate_response(_):
    web_output_area.clear_output()
    
    query = web_input_box.value
    use_local = local_rag_checkbox.value
    use_web = web_search_checkbox.value
    local_docs = local_docs_slider.value
    web_docs = web_docs_slider.value
    
    with web_output_area:
        # Display query info
        print("🔍 WEB-ENHANCED RAG SYSTEM")
        print("=" * 50)
        print(f"Query: {query}")
        print(f"Local Knowledge: {'✓' if use_local else '✗'}")
        print(f"Web Search: {'✓' if use_web else '✗'}")
        print(f"Local Docs: {local_docs}, Web Docs: {web_docs}")
        print("-" * 50)
        
        if use_local or use_web:
            print("🔍 Retrieving information...")
            
        try:
            response, sources = generate_web_enhanced_response(
                query, use_local, use_web, local_docs, web_docs
            )
            
            # Display sources
            if sources:
                print("\n📚 SOURCES CONSULTED:")
                local_count = sum(1 for s in sources if s['source'] == 'local')
                web_count = sum(1 for s in sources if s['source'] == 'web')
                
                print(f"  📚 Local Sources: {local_count}")
                print(f"  🌐 Web Sources: {web_count}")
                print()
                
                for i, source in enumerate(sources):
                    icon = "🌐" if source['source'] == 'web' else "📚"
                    score = source.get('relevance_score', 0.0)  # Use .get() with default
                    print(f"  {icon} {source['title']} (Score: {score:.3f})")
                    if source['source'] == 'web' and 'url' in source:
                        print(f"    URL: {source['url']}")
            
            print("\n🍞 BERND'S RESPONSE:")
            print("-" * 30)
            print(response)
            
        except Exception as e:
            print(f"❌ Error: {str(e)}")
            import traceback
            traceback.print_exc()

def web_enhanced_clear_output(_):
    web_output_area.clear_output()

# Connect button events
web_generate_button.on_click(web_enhanced_generate_response)
web_clear_button.on_click(web_enhanced_clear_output)

print("🌐 Web-Enhanced RAG System Ready!")
print("This is the most advanced interface - combining local knowledge with real-time web search!")
print("Ask questions about current events, latest developments, or any topic!")
print("The system will intelligently search both local knowledge and the web for the most relevant information.")

🌐 EXAMPLE 3: WEB-ENHANCED RAG INTERFACE


VBox(children=(Textarea(value='What are the latest developments in artificial intelligence and machine learnin…

🌐 Web-Enhanced RAG System Ready!
This is the most advanced interface - combining local knowledge with real-time web search!
Ask questions about current events, latest developments, or any topic!
The system will intelligently search both local knowledge and the web for the most relevant information.


#  Complete Guide to Progressive LLM Enhancement

This notebook demonstrates the complete evolution of LLM applications through three progressive stages:

##  **The Three-Stage Journey:**

###  **Example 1: Simple Chat Interface**
- **Location**: Cell 4
- **Purpose**: Basic chat interface with Bernd the Bread
- **Knowledge**: Only the base model's training data
- **Best for**: General conversation and creative tasks
- **Try**: "Tell me about artificial intelligence"

###  **Example 2: RAG-Enhanced Interface**
- **Location**: Cell 5  
- **Purpose**: Adds local knowledge base retrieval
- **Knowledge**: Model training data + curated documents
- **Best for**: Specific domain questions about AI, ML, ZeMA
- **Try**: "What is the difference between machine learning and deep learning?"
- **Toggle**: RAG on/off to compare responses

###  **Example 3: Web-Enhanced RAG Interface**
- **Location**: Cell 8
- **Purpose**: Hybrid RAG with real-time web search
- **Knowledge**: Model + local docs + live web results
- **Best for**: Current events, latest developments, comprehensive research
- **Try**: "What are the latest AI developments in 2024?"
- **Controls**: Toggle local/web sources, adjust document counts

---

##  **Progressive Testing Strategy:**

### **Step 1: Baseline (Example 1)**
Ask these questions in the simple interface:
- "What is machine learning?"
- "What is ZeMA?"  
- "What are the latest AI developments?"

### **Step 2: Enhanced Knowledge (Example 2)**
Ask the same questions with RAG enabled:
- Notice improved accuracy for domain-specific topics
- Toggle RAG on/off to see the difference
- Observe document retrieval and relevance scores

### **Step 3: Real-Time Information (Example 3)**
Ask the same questions with web search:
- See how current information enhances responses
- Try different combinations: local-only, web-only, hybrid
- Notice source attribution and URLs

---

##  **Key Learning Points:**

1. **Simple Model**: Fast but limited to training data
2. **RAG Enhancement**: Adds domain expertise but static knowledge
3. **Web Integration**: Provides current information but adds complexity

##  **Technical Features:**

- **No API Keys**: Everything runs locally or uses free services
- **Modular Design**: Each example builds on the previous
- **Source Attribution**: Clear indication of information sources
- **Flexible Controls**: Adjust retrieval parameters for each mode
- **Error Handling**: Graceful fallbacks if components fail

##  **Performance Comparison:**

| Feature | Example 1 | Example 2 | Example 3 |
|---------|-----------|-----------|-----------|
| Speed | ⚡ Fastest | 🔄 Medium | 🌐 Slower |
| Accuracy | 📊 Basic | 📚 Good | 🎯 Excellent |
| Currency | ❌ Static | ❌ Static | ✅ Real-time |
| Coverage | 🔒 Limited | 📖 Domain | 🌍 Global |

---

##  **Usage Instructions:**

1. **Run Setup Cells** (1-3): Load model and create knowledge base
2. **Try Example 1**: Experience basic LLM interaction
3. **Try Example 2**: See how RAG improves domain knowledge
4. **Try Example 3**: Experience the full power of web-enhanced RAG
5. **Compare Results**: Use the same queries across all three examples

This progression shows how modern LLM applications evolve from simple chat to sophisticated, knowledge-enhanced systems!