# Moshi Voice Server on Kaggle (Free GPU) + Tailscale

This runs Moshi on Kaggle's free T4 GPU and exposes it via Tailscale.

**Requirements:**
- Kaggle account (free)
- Tailscale account (free) - get auth key from https://login.tailscale.com/admin/settings/keys

**Steps:**
1. Enable GPU: Settings → Accelerator → GPU T4 x2
2. Set your Tailscale auth key below
3. Run all cells
4. Connect from your phone using the Tailscale IP

In [None]:
# Check GPU is enabled
!nvidia-smi || echo "ERROR: Enable GPU in Settings → Accelerator → GPU P100 or T4"

In [None]:
# Configuration - SET YOUR AUTH KEY HERE
TAILSCALE_AUTH_KEY = ""  # @param {type:"string"}

# Get your auth key from: https://login.tailscale.com/admin/settings/keys
# Click "Generate auth key" and paste it above

In [None]:
# Install Tailscale (userspace mode - no root needed)
!curl -fsSL https://tailscale.com/install.sh | sh

# Run tailscaled in userspace mode (background)
import subprocess
import os

# Start tailscaled in userspace mode
tailscaled = subprocess.Popen(
    ['tailscaled', '--tun=userspace-networking', '--socks5-server=localhost:1055'],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE
)
print("Tailscaled starting...")

In [None]:
# Connect to Tailscale
import time
time.sleep(2)  # Wait for tailscaled to start

if TAILSCALE_AUTH_KEY:
    !tailscale up --authkey={TAILSCALE_AUTH_KEY} --hostname=kaggle-moshi --accept-routes
    print("Connected to Tailscale!")
else:
    print("ERROR: Set your TAILSCALE_AUTH_KEY above!")
    print("Get it from: https://login.tailscale.com/admin/settings/keys")

In [None]:
# Get our Tailscale IP
result = !tailscale ip -4
TAILSCALE_IP = result[0] if result else "Not connected"
print(f"Tailscale IP: {TAILSCALE_IP}")
print(f"\nYour phone can connect to: http://{TAILSCALE_IP}:8080")

In [None]:
# Install Moshi and dependencies
!pip install -q moshi torch torchaudio silero-vad websockets
print("Moshi installed!")

In [None]:
# Create Moshi WebSocket server
%%writefile moshi_ws_server.py
import asyncio
import websockets
import json
import base64
import numpy as np
import torch
from pathlib import Path

# Moshi imports
try:
    from moshi.models import loaders
    from moshi.client import MoshiClient
    from moshi.utils import AUDIO_SAMPLE_RATE
    MOSHI_AVAILABLE = True
except ImportError:
    MOSHI_AVAILABLE = False
    print("Warning: Moshi not fully available, using fallback")

class MoshiWebSocketServer:
    def __init__(self, port=8080):
        self.port = port
        self.model = None
        self.client = None
        
    async def init_model(self):
        """Initialize Moshi model"""
        if not MOSHI_AVAILABLE:
            return False
            
        print("Loading Moshi model (this takes 1-2 minutes)...")
        try:
            # Download and load model
            model_path = Path.home() / ".cache" / "moshi"
            model_path.mkdir(parents=True, exist_ok=True)
            
            # Use Moshi client which handles model loading
            self.client = MoshiClient()
            print("Moshi model loaded!")
            return True
        except Exception as e:
            print(f"Error loading model: {e}")
            return False
    
    async def handle_connection(self, websocket):
        """Handle WebSocket connection"""
        print(f"Client connected from {websocket.remote_address}")
        
        try:
            async for message in websocket:
                try:
                    data = json.loads(message)
                    
                    if data.get('type') == 'audio':
                        # Decode base64 audio
                        audio_b64 = data.get('data', '')
                        audio_bytes = base64.b64decode(audio_b64)
                        
                        # Send status
                        await websocket.send(json.dumps({
                            'type': 'status',
                            'message': 'Processing...'
                        }))
                        
                        # Process with Moshi
                        text_response, audio_response = await self.process_audio(audio_bytes)
                        
                        # Send text response
                        if text_response:
                            await websocket.send(json.dumps({
                                'type': 'text',
                                'content': text_response
                            }))
                        
                        # Send audio response
                        if audio_response is not None and len(audio_response) > 0:
                            await websocket.send(json.dumps({
                                'type': 'audio',
                                'data': base64.b64encode(audio_response).decode()
                            }))
                        
                        # Signal done
                        await websocket.send(json.dumps({
                            'type': 'status',
                            'message': 'done'
                        }))
                        
                    elif data.get('type') == 'config':
                        voice = data.get('voice', 'NATF2')
                        await websocket.send(json.dumps({
                            'type': 'status',
                            'message': f'Configured with voice: {voice}'
                        }))
                        
                except json.JSONDecodeError:
                    await websocket.send(json.dumps({
                        'type': 'error',
                        'message': 'Invalid JSON format'
                    }))
                    
        except websockets.exceptions.ConnectionClosed:
            print("Client disconnected")
        except Exception as e:
            print(f"Connection error: {e}")
            try:
                await websocket.send(json.dumps({
                    'type': 'error',
                    'message': str(e)
                }))
            except:
                pass
    
    async def process_audio(self, audio_bytes):
        """Process audio with Moshi"""
        try:
            if self.client:
                # Use actual Moshi processing
                result = self.client.process(audio_bytes)
                return result.text, result.audio
            else:
                # Fallback: return echo response
                return "Moshi model not loaded. Please wait and try again.", None
                
        except Exception as e:
            print(f"Processing error: {e}")
            return f"Error processing audio: {e}", None

async def main():
    server = MoshiWebSocketServer(port=8080)
    
    # Initialize model
    model_loaded = await server.init_model()
    
    print(f"\n{'='*60}")
    print(f"Moshi WebSocket Server Running!")
    print(f"{'='*60}")
    print(f"\nWebSocket URL: ws://0.0.0.0:8080")
    print(f"\nOn your machine, run:")
    print(f"  my-agent config --set-moshi-url ws://TAILSCALE_IP:8080")
    print(f"{'='*60}\n")
    
    async with websockets.serve(server.handle_connection, "0.0.0.0", 8080):
        await asyncio.Future()  # Run forever

if __name__ == "__main__":
    asyncio.run(main())

In [None]:
# Run Moshi server
!python moshi_ws_server.py

---

## Connection Instructions

Once running, note the **Tailscale IP** from above.

On your local machine:
```bash
my-agent config --set-moshi-url ws://TAILSCALE_IP:8080
my-agent serve
```

On your phone, open: `http://YOUR_LOCAL_MACHINE_TAILSCALE_IP:3000`