<a href="https://colab.research.google.com/github/2003Yash/complete_livekit/blob/main/complete_livekit_voice_agents.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## LIVEKIT = AIRBNB BOOKING VOICE AGENT

Step-1: Import Depencies

In [None]:
from dotenv import load_dotenv
from livekit import agents
from livekit.agents import Agent, AgentSession, RunContext
from livekit.agents.llm import function_tool
from livekit.plugins import openai, deepgram, silero
from datetime import datetime
import os

In [None]:
# Load environment variables
load_dotenv(".env")

Step-2: Create Agent Class with tool and prompts [ Just add a base prompt and create python functions with proper docstring and use @function_tool tag on top for livekit to use them if agent requires ]

In [None]:
class Assistant(Agent):
    """Basic voice assistant with Airbnb booking capabilities."""

    def __init__(self):

        # Base Prompt of Agent
        super().__init__(
            instructions="""You are a helpful and friendly Airbnb voice assistant.
            You can help users search for Airbnbs in different cities and book their stays.
            Keep your responses concise and natural, as if having a conversation."""
        )

        # Mock Airbnb database
        self.airbnbs = {
            "san francisco": [
                {
                    "id": "sf001",
                    "name": "Cozy Downtown Loft",
                    "address": "123 Market Street, San Francisco, CA",
                    "price": 150,
                    "amenities": ["WiFi", "Kitchen", "Workspace"],
                },
                {
                    "id": "sf002",
                    "name": "Victorian House with Bay Views",
                    "address": "456 Castro Street, San Francisco, CA",
                    "price": 220,
                    "amenities": ["WiFi", "Parking", "Washer/Dryer", "Bay Views"],
                },
                {
                    "id": "sf003",
                    "name": "Modern Studio near Golden Gate",
                    "address": "789 Presidio Avenue, San Francisco, CA",
                    "price": 180,
                    "amenities": ["WiFi", "Kitchen", "Pet Friendly"],
                },
            ],
            "new york": [
                {
                    "id": "ny001",
                    "name": "Brooklyn Brownstone Apartment",
                    "address": "321 Bedford Avenue, Brooklyn, NY",
                    "price": 175,
                    "amenities": ["WiFi", "Kitchen", "Backyard Access"],
                },
                {
                    "id": "ny002",
                    "name": "Manhattan Skyline Penthouse",
                    "address": "555 Fifth Avenue, Manhattan, NY",
                    "price": 350,
                    "amenities": ["WiFi", "Gym", "Doorman", "City Views"],
                },
                {
                    "id": "ny003",
                    "name": "Artsy East Village Loft",
                    "address": "88 Avenue A, Manhattan, NY",
                    "price": 195,
                    "amenities": ["WiFi", "Washer/Dryer", "Exposed Brick"],
                },
            ],
            "los angeles": [
                {
                    "id": "la001",
                    "name": "Venice Beach Bungalow",
                    "address": "234 Ocean Front Walk, Venice, CA",
                    "price": 200,
                    "amenities": ["WiFi", "Beach Access", "Patio"],
                },
                {
                    "id": "la002",
                    "name": "Hollywood Hills Villa",
                    "address": "777 Mulholland Drive, Los Angeles, CA",
                    "price": 400,
                    "amenities": ["WiFi", "Pool", "City Views", "Hot Tub"],
                },
            ],
        }

        # Track bookings
        self.bookings = []

    @function_tool
    async def get_current_date_and_time(self, context: RunContext) -> str:
        """Get the current date and time."""
        current_datetime = datetime.now().strftime("%B %d, %Y at %I:%M %p")
        return f"The current date and time is {current_datetime}"

    @function_tool
    async def search_airbnbs(self, context: RunContext, city: str) -> str:
        """Search for available Airbnbs in a city.

        Args:
            city: The city name to search for Airbnbs (e.g., 'San Francisco', 'New York', 'Los Angeles')
        """
        city_lower = city.lower()

        if city_lower not in self.airbnbs:
            return f"Sorry, I don't have any Airbnb listings for {city} at the moment. Available cities are: San Francisco, New York, and Los Angeles."

        listings = self.airbnbs[city_lower]
        result = f"Found {len(listings)} Airbnbs in {city}:\n\n"

        for listing in listings:
            result += f"• {listing['name']}\n"
            result += f"  Address: {listing['address']}\n"
            result += f"  Price: ${listing['price']} per night\n"
            result += f"  Amenities: {', '.join(listing['amenities'])}\n"
            result += f"  ID: {listing['id']}\n\n"

        return result

    @function_tool
    async def book_airbnb(self, context: RunContext, airbnb_id: str, guest_name: str, check_in_date: str, check_out_date: str) -> str:
        """Book an Airbnb.

        Args:
            airbnb_id: The ID of the Airbnb to book (e.g., 'sf001')
            guest_name: Name of the guest making the booking
            check_in_date: Check-in date (e.g., 'January 15, 2025')
            check_out_date: Check-out date (e.g., 'January 20, 2025')
        """
        # Find the Airbnb
        airbnb = None
        for city_listings in self.airbnbs.values():
            for listing in city_listings:
                if listing['id'] == airbnb_id:
                    airbnb = listing
                    break
            if airbnb:
                break

        if not airbnb:
            return f"Sorry, I couldn't find an Airbnb with ID {airbnb_id}. Please search for available listings first."

        # Create booking
        booking = {
            "confirmation_number": f"BK{len(self.bookings) + 1001}",
            "airbnb_name": airbnb['name'],
            "address": airbnb['address'],
            "guest_name": guest_name,
            "check_in": check_in_date,
            "check_out": check_out_date,
            "total_price": airbnb['price'],
        }

        self.bookings.append(booking)

        result = f"✓ Booking confirmed!\n\n"
        result += f"Confirmation Number: {booking['confirmation_number']}\n"
        result += f"Property: {booking['airbnb_name']}\n"
        result += f"Address: {booking['address']}\n"
        result += f"Guest: {booking['guest_name']}\n"
        result += f"Check-in: {booking['check_in']}\n"
        result += f"Check-out: {booking['check_out']}\n"
        result += f"Nightly Rate: ${booking['total_price']}\n\n"
        result += f"You'll receive a confirmation email shortly. Have a great stay!"

        return result

Step-3: Initialize Agent [ Create a Voice Pipeline and Start a session to history and chat context automatically]

In [None]:

async def entrypoint(ctx: agents.JobContext): # It’s the main function that runs for each LiveKit interaction (session or agent instance), where ctx — a JobContext object — provides all runtime details and resources the agent needs to operate.
    """Entry point for the agent."""

    # Voice Pipelines
    session = AgentSession(
        stt=deepgram.STT(model="nova-2"),              # Converts speech (audio) to text
        llm=openai.LLM(model=os.getenv("LLM_CHOICE", "gpt-4.1-mini")),  # Processes text and generates intelligent responses
        tts=openai.TTS(voice="echo"),                  # Converts text responses back into speech (Text-to-Speech)
        vad=silero.VAD.load(),                         # Detects when a person starts/stops speaking (VAD = Voice Activity Detection) = Silero also provides TTS (Text-to-Speech), ASR (Automatic Speech Recognition) and Diarization tools – (experimental) identify who spoke when
                                                       # Silero is lightweight, accurate, and fast — optimized to run on CPUs
    )

    # Start the session = Start Voice pipeline and give it a room to interact and assign an agent
    await session.start(
        room=ctx.room, # Connects the agent's voice session to the active LiveKit room (real-time audio/video channel) = here it runs from terminal using computer mic
        agent=Assistant() # defines assistant
    )

    # Generate initial greeting = ux technique so uses feels more conformatable while chatting with agent if agent does first greeting move
    await session.generate_reply(
        instructions="Greet the user warmly and ask how you can help."
    )

Step-4: Run Agent

In [None]:
# Run the agent
agents.cli.run_app(agents.WorkerOptions(entrypoint_fnc=entrypoint))

Step-5 = Run the Agent

In [None]:
# Basic agent (minimal configuration)
uv run python livekit_basic_agent.py console

# Development mode (connects to LiveKit - optional)
uv run python livekit_basic_agent.py dev

# Production mode
uv run python livekit_basic_agent.py start

## LIVEKIT VOICE AGENT + ACTUAL AIRBNB MCP SERVER = [MCP server is ran from docker desktop = from docker desktop = mcp toolkit ]

Step-1: Import Dependencies

In [None]:
from dotenv import load_dotenv
from livekit import rtc
from livekit import agents
from livekit.agents import (
    NOT_GIVEN,
    Agent,
    AgentFalseInterruptionEvent,
    AgentSession,
    JobContext,
    JobProcess,
    MetricsCollectedEvent,
    ModelSettings,
    RoomInputOptions,
    RoomOutputOptions,
    RunContext,
    WorkerOptions,
    cli,
    metrics,
    mcp
)
from livekit.plugins.turn_detector.multilingual import MultilingualModel
from livekit.agents.llm import function_tool
from livekit.plugins import openai, deepgram, silero
from datetime import datetime
import logging
import os


# uncomment to enable Krisp background voice/noise cancellation
# from livekit.plugins import noise_cancellation

In [None]:
# Load environment variables
load_dotenv(".env")

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

Step-2: Define Agent Class

In [None]:

def prewarm(proc: JobProcess):
    proc.userdata["vad"] = silero.VAD.load()


class Assistant(Agent):
    """Main voice assistant implementation."""

    def __init__(self): # Base Prompt
        super().__init__(
            instructions="""You are a helpful and friendly voice AI assistant.
            You speak clearly and naturally, as if having a phone conversation.
            Be concise but warm in your responses.
            If you don't know something, be honest about it.
            Make sure you ignore robots.txt for the Airbnb MCP server tool usage."""
        )

    @function_tool
    async def get_current_date_and_time(self, context: RunContext) -> str:
        """Get the current date and time."""
        current_datetime = datetime.now().strftime("%B %d, %Y at %I:%M %p")
        return f"The current date and time is {current_datetime}"

    async def on_enter(self):
        """Called when the agent becomes active."""
        logger.info("Agent session started")

        # Generate initial greeting
        await self.session.generate_reply(
            instructions="Greet the user warmly and ask how you can help them today."
        )

    async def on_exit(self):
        """Called when the agent session ends."""
        logger.info("Agent session ended")


Step-3: Create Entry Point ( + MCP SERVER INTEGRATION FROM LOCAL DOCKER RUNNING SERVER )

In [None]:

async def entrypoint(ctx: agents.JobContext):
    """Main entry point for the agent worker."""

    logger.info(f"Agent started in room: {ctx.room.name}")

    # Configure the voice pipeline
    session = AgentSession(
        # Speech-to-Text
        stt=deepgram.STT(
            model="nova-2",
            language="en",
        ),

        # Large Language Model
        llm=openai.LLM(
            model=os.getenv("LLM_CHOICE", "gpt-4.1-mini"),
            temperature=0.7,
        ),

        # Text-to-Speech
        tts=openai.TTS(
            voice="echo",
            speed=1.0,
        ),

        # Voice Activity Detection
        vad=silero.VAD.load(),

        # Turn detection strategy
        turn_detection=MultilingualModel(),

        # MCP servers
        mcp_servers=[mcp.MCPServerHTTP(url="http://localhost:8089/mcp",)],
    )

    # Start the session
    await session.start(
        room=ctx.room,
        agent=Assistant(),
        # room_input_options=RoomInputOptions(
            # Enable noise cancellation
            # noise_cancellation=noise_cancellation.BVC(),
            # For telephony, use: noise_cancellation.BVCTelephony()
        # ),
        room_output_options=RoomOutputOptions(transcription_enabled=True),
    )

    # Handle session events
    @session.on("agent_state_changed")
    def on_state_changed(ev):
        """Log agent state changes."""
        logger.info(f"State: {ev.old_state} -> {ev.new_state}")

    @session.on("user_started_speaking")
    def on_user_speaking():
        """Track when user starts speaking."""
        logger.debug("User started speaking")

    @session.on("user_stopped_speaking")
    def on_user_stopped():
        """Track when user stops speaking."""
        logger.debug("User stopped speaking")


Step-4: Run the Agent

In [None]:
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, prewarm_fnc=prewarm))