# Moshi Voice Chat Server

This notebook runs Moshi on a free T4 GPU and exposes it via ngrok.

**Instructions:**
1. Run all cells
2. Copy the ngrok URL printed at the end
3. On your local machine: `my-agent config --set-moshi-url wss://YOUR_NGROK_URL/api/chat`
4. Start your agent server: `my-agent serve`

In [None]:
# Check GPU
!nvidia-smi

In [None]:
# Install dependencies
!pip install -U moshi torch torchaudio
!pip install pyngrok

In [None]:
# Install silero-vad (required for voice activity detection)
!pip install -q silero-vad

In [None]:
# Set up ngrok for external access
from pyngrok import ngrok
import os

# You'll need an ngrok auth token from https://dashboard.ngrok.com/get-started/your-authtoken
NGROK_AUTH_TOKEN = ""  # @param {type:"string"}

if NGROK_AUTH_TOKEN:
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    print("Ngrok configured!")
else:
    print("⚠️ Set your NGROK_AUTH_TOKEN above for external access")

In [None]:
# Create a simple WebSocket wrapper for Moshi
%%writefile moshi_server.py
import asyncio
import websockets
import json
import base64
import torch
from moshi.client import MoshiClient
from moshi.utils import AUDIO_SAMPLE_RATE

class MoshiWebSocketServer:
    def __init__(self):
        self.client = None
        
    async def handle_connection(self, websocket):
        print("Client connected")
        
        try:
            # Initialize Moshi client for each connection
            self.client = MoshiClient()
            
            async for message in websocket:
                try:
                    data = json.loads(message)
                    
                    if data.get('type') == 'audio':
                        # Decode base64 audio
                        audio_bytes = base64.b64decode(data['data'])
                        
                        # Process with Moshi
                        response_audio, response_text = await self.process_audio(audio_bytes)
                        
                        # Send response
                        if response_text:
                            await websocket.send(json.dumps({
                                'type': 'text',
                                'content': response_text
                            }))
                        
                        if response_audio:
                            await websocket.send(json.dumps({
                                'type': 'audio',
                                'data': base64.b64encode(response_audio).decode()
                            }))
                            
                    elif data.get('type') == 'config':
                        await websocket.send(json.dumps({
                            'type': 'status',
                            'message': 'Configured'
                        }))
                        
                except json.JSONDecodeError:
                    await websocket.send(json.dumps({
                        'type': 'error',
                        'message': 'Invalid JSON'
                    }))
                    
        except websockets.exceptions.ConnectionClosed:
            print("Client disconnected")
        except Exception as e:
            print(f"Error: {e}")
            await websocket.send(json.dumps({
                'type': 'error',
                'message': str(e)
            }))
            
    async def process_audio(self, audio_bytes):
        # Simplified - in real implementation, use Moshi properly
        try:
            # Use Moshi client to process
            response = self.client.process(audio_bytes)
            return response.audio, response.text
        except Exception as e:
            print(f"Processing error: {e}")
            return None, f"Error: {e}"

async def main():
    server = MoshiWebSocketServer()
    async with websockets.serve(server.handle_connection, "0.0.0.0", 8080):
        print("Moshi WebSocket server running on ws://0.0.0.0:8080")
        await asyncio.Future()  # Run forever

if __name__ == "__main__":
    asyncio.run(main())

In [None]:
# Alternative: Use the built-in Moshi Gradio interface with tunnel
import subprocess
import threading
import time

# Start Moshi Gradio server in background
def run_moshi():
    !python -m moshi.server --gradio-tunnel --hf-repo kyutai/moshika-pytorch-bf16

# Run in thread
moshi_thread = threading.Thread(target=run_moshi, daemon=True)
moshi_thread.start()

print("Starting Moshi server... (this takes 2-3 minutes to load the model)")
print("The Gradio tunnel URL will appear above when ready.")

---

## Alternative: Direct WebSocket with Ngrok

Run this if you want to connect via WebSocket instead of Gradio:

In [None]:
# Start ngrok tunnel
public_url = ngrok.connect(8080, "http")
print(f"\n{'='*60}")
print(f"Moshi WebSocket URL: wss://{public_url.public_host}/api/chat")
print(f"{'='*60}\n")
print("Copy the URL above and run on your machine:")
print("  my-agent config --set-moshi-url wss://YOUR_URL/api/chat")

In [None]:
# Keep the notebook alive
import time
while True:
    time.sleep(60)
    print("Server still running...")