# üöÄ Trellis NIM Deployment Notebook

This notebook provides a complete deployment and management interface for the Trellis NIM 3D Model Generation Application.

## Features
- ‚úÖ System prerequisites checking
- üîß Environment configuration
- üê≥ Docker compose deployment
- üìä Health monitoring
- üìù Log viewing
- üîÑ Service management (start/stop/restart)

## Prerequisites
- Linux VM with NVIDIA GPU
- Docker and Docker Compose installed
- NVIDIA Docker runtime configured
- NVIDIA API Key from https://build.nvidia.com/microsoft/trellis


## 1. Setup and Configuration

Install required Python packages and import dependencies.


In [None]:
# Install required packages
%pip install -q python-dotenv requests tabulate psutil ipywidgets


In [None]:
import os
import sys
import subprocess
import time
import json
import requests
from pathlib import Path
from datetime import datetime
from dotenv import load_dotenv, set_key
from tabulate import tabulate
import psutil
from typing import Dict, List, Tuple

# Set project root directory
PROJECT_ROOT = Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd()
os.chdir(PROJECT_ROOT)

print(f"üìÇ Project Root: {PROJECT_ROOT}")
print(f"‚úÖ Setup complete!")


In [None]:
def run_command(cmd: str, shell: bool = True, capture: bool = True) -> Tuple[int, str, str]:
    """Execute a shell command and return exit code, stdout, stderr."""
    try:
        if capture:
            result = subprocess.run(
                cmd, 
                shell=shell, 
                capture_output=True, 
                text=True,
                cwd=PROJECT_ROOT
            )
            return result.returncode, result.stdout, result.stderr
        else:
            result = subprocess.run(cmd, shell=shell, cwd=PROJECT_ROOT)
            return result.returncode, "", ""
    except Exception as e:
        return 1, "", str(e)

def print_status(message: str, status: str = "info"):
    """Print colored status messages."""
    colors = {
        "success": "\033[92m‚úì",
        "error": "\033[91m‚úó",
        "warning": "\033[93m‚ö†",
        "info": "\033[94m‚Ñπ"
    }
    reset = "\033[0m"
    print(f"{colors.get(status, colors['info'])} {message}{reset}")

def check_command_exists(cmd: str) -> bool:
    """Check if a command exists in the system."""
    code, _, _ = run_command(f"which {cmd}")
    return code == 0

print("‚úÖ Utility functions loaded")


## 2. System Prerequisites Check

Check all system requirements before deployment.


In [None]:
def check_prerequisites() -> Dict[str, bool]:
    """Check all system prerequisites."""
    checks = {}
    
    # Check Docker
    checks['docker'] = check_command_exists('docker')
    if checks['docker']:
        code, stdout, _ = run_command('docker --version')
        if code == 0:
            print_status(f"Docker: {stdout.strip()}", "success")
    else:
        print_status("Docker not found", "error")
    
    # Check Docker Compose
    checks['docker-compose'] = check_command_exists('docker-compose') or check_command_exists('docker compose')
    if checks['docker-compose']:
        code, stdout, _ = run_command('docker-compose --version || docker compose version')
        if code == 0:
            print_status(f"Docker Compose: {stdout.strip()}", "success")
    else:
        print_status("Docker Compose not found", "error")
    
    # Check GPU
    if check_command_exists('nvidia-smi'):
        code, stdout, _ = run_command('nvidia-smi --query-gpu=name,memory.total --format=csv,noheader')
        if code == 0:
            checks['gpu'] = True
            print_status(f"GPU: {stdout.strip()}", "success")
        else:
            checks['gpu'] = False
    else:
        checks['gpu'] = False
        print_status("nvidia-smi not found", "error")
    
    # Check disk space
    disk = psutil.disk_usage('/')
    free_gb = disk.free / (1024**3)
    checks['disk_space'] = free_gb > 50
    if checks['disk_space']:
        print_status(f"Disk Space: {free_gb:.1f} GB free", "success")
    else:
        print_status(f"Disk Space: {free_gb:.1f} GB free (need > 50 GB)", "warning")
    
    # Check RAM
    ram = psutil.virtual_memory()
    ram_gb = ram.total / (1024**3)
    checks['ram'] = ram_gb > 16
    if checks['ram']:
        print_status(f"RAM: {ram_gb:.1f} GB total", "success")
    else:
        print_status(f"RAM: {ram_gb:.1f} GB total (recommend > 16 GB)", "warning")
    
    # Summary
    all_critical_passed = all([checks.get('docker'), checks.get('docker-compose'), checks.get('gpu')])
    
    print("\n" + "="*50)
    if all_critical_passed:
        print_status("All critical prerequisites met!", "success")
    else:
        print_status("Some prerequisites are missing. Please install them before proceeding.", "error")
    print("="*50)
    
    return checks

# Run the check
prerequisite_results = check_prerequisites()


In [None]:
def setup_env_files():
    """Create .env files from examples if they don't exist."""
    env_files = [
        ('.env.example', '.env'),
        ('backend/env.example', 'backend/.env')
    ]
    
    for example, target in env_files:
        example_path = PROJECT_ROOT / example
        target_path = PROJECT_ROOT / target
        
        if not target_path.exists():
            if example_path.exists():
                import shutil
                shutil.copy(example_path, target_path)
                print_status(f"Created {target}", "success")
            else:
                print_status(f"Example file {example} not found", "error")
        else:
            print_status(f"{target} already exists", "info")

setup_env_files()


### üîë Configure NVIDIA API Key

**IMPORTANT**: Replace the placeholder below with your actual NVIDIA API key.

Get your API key from: https://build.nvidia.com/microsoft/trellis


In [None]:
def configure_api_key(api_key: str):
    """Configure NVIDIA API key in environment files."""
    if not api_key or api_key == 'your_nvidia_api_key_here':
        print_status("Please provide a valid NVIDIA API key", "error")
        return False
    
    # Update root .env
    env_file = PROJECT_ROOT / '.env'
    if env_file.exists():
        set_key(str(env_file), 'NVIDIA_API_KEY', api_key)
        print_status(f"Updated {env_file}", "success")
    
    # Update backend .env
    backend_env = PROJECT_ROOT / 'backend' / '.env'
    if backend_env.exists():
        set_key(str(backend_env), 'TRELLIS_NIM_API_KEY', api_key)
        print_status(f"Updated {backend_env}", "success")
    
    print_status("API key configured successfully!", "success")
    return True

# ‚ö†Ô∏è REPLACE THIS WITH YOUR ACTUAL API KEY ‚ö†Ô∏è
NVIDIA_API_KEY = "your_nvidia_api_key_here"

if NVIDIA_API_KEY != "your_nvidia_api_key_here":
    configure_api_key(NVIDIA_API_KEY)
else:
    print_status("‚ö†Ô∏è  Please set your NVIDIA_API_KEY in the cell above", "warning")
    print("Get your API key from: https://build.nvidia.com/microsoft/trellis")


## 4. Project Setup

Install dependencies and build the application.


In [None]:
def create_directories():
    """Create necessary directories for the application."""
    directories = [
        'backend/uploads',
        'backend/logs',
        'trellis_nim_logs',
        'nginx/ssl'
    ]
    
    for dir_path in directories:
        full_path = PROJECT_ROOT / dir_path
        full_path.mkdir(parents=True, exist_ok=True)
        print_status(f"Created directory: {dir_path}", "success")

create_directories()

print("\nüì¶ Installing backend dependencies...")
code, _, _ = run_command('cd backend && npm install')
if code == 0:
    print_status("Backend dependencies installed", "success")
else:
    print_status("Failed to install backend dependencies", "error")

print("\nüì¶ Installing frontend dependencies...")
code, _, _ = run_command('npm install')
if code == 0:
    print_status("Frontend dependencies installed", "success")
else:
    print_status("Failed to install frontend dependencies", "error")

print("\nüî® Building backend...")
code, _, _ = run_command('cd backend && npm run build')
if code == 0:
    print_status("Backend built successfully", "success")
else:
    print_status("Failed to build backend", "error")


## 5. Docker Deployment

Pull images and start all services.


In [None]:
def pull_docker_images():
    """Pull required Docker images."""
    images = [
        'nvcr.io/nim/microsoft/trellis:1.0.0',
        'nginx:alpine'
    ]
    
    for image in images:
        print(f"\nüì• Pulling {image}...")
        code, _, _ = run_command(f'docker pull {image}')
        if code == 0:
            print_status(f"Pulled {image}", "success")
        else:
            print_status(f"Failed to pull {image}", "error")

pull_docker_images()


In [None]:
def start_services(detached: bool = True):
    """Start all services using Docker Compose."""
    cmd = 'docker-compose up'
    
    if detached:
        cmd += ' -d'
    
    print(f"üöÄ Starting services...")
    print(f"Command: {cmd}")
    
    code, _, _ = run_command(cmd)
    
    if code == 0:
        print_status("Services started successfully!", "success")
        print("\n‚è≥ Waiting for services to initialize (this may take 2-3 minutes)...")
        time.sleep(10)
        return True
    else:
        print_status("Failed to start services", "error")
        return False

# Start services in detached mode
start_services(detached=True)


## 6. Health Checks and Monitoring

Check the health of all services.


In [None]:
def check_endpoint(url: str, timeout: int = 5) -> Tuple[bool, str]:
    """Check if an endpoint is responding."""
    try:
        response = requests.get(url, timeout=timeout)
        if response.status_code == 200:
            return True, "Healthy"
        else:
            return False, f"HTTP {response.status_code}"
    except requests.exceptions.ConnectionError:
        return False, "Connection Failed"
    except requests.exceptions.Timeout:
        return False, "Timeout"
    except Exception as e:
        return False, str(e)

def check_all_services():
    """Check health of all services."""
    endpoints = [
        ('Trellis NIM', 'http://localhost:8080/health'),
        ('Backend API', 'http://localhost:3001/api/health'),
        ('Frontend', 'http://localhost:5173'),
    ]
    
    print("\nüè• Health Check Results:\n")
    
    results = []
    for name, url in endpoints:
        healthy, status = check_endpoint(url)
        status_symbol = "‚úì" if healthy else "‚úó"
        results.append([name, url, status, status_symbol])
    
    print(tabulate(results, headers=['Service', 'URL', 'Status', 'Health'], tablefmt='grid'))
    
    all_healthy = all([check_endpoint(url)[0] for _, url in endpoints])
    
    if all_healthy:
        print_status("\nüéâ All services are healthy!", "success")
    else:
        print_status("\n‚ö†Ô∏è  Some services are not responding. They may still be starting up.", "warning")
        print("Try running this cell again in a minute or two.")

check_all_services()


In [None]:
def display_service_status():
    """Display status of all services."""
    print("\nüìä Service Status:\n")
    code, stdout, _ = run_command('docker-compose ps')
    print(stdout)

display_service_status()


## 7. View Logs

Check logs from any service.


In [None]:
def view_logs(service: str = None, lines: int = 50):
    """View logs for a specific service or all services."""
    cmd = 'docker-compose logs'
    
    if service:
        cmd += f' {service}'
    
    cmd += f' --tail={lines}'
    
    print(f"üìã Logs ({service if service else 'all services'}):")
    print("="*80)
    
    code, stdout, _ = run_command(cmd)
    
    if code == 0 and stdout:
        print(stdout)

# View last 50 lines of all logs
view_logs(lines=50)

# Uncomment to view specific service logs:
# view_logs('trellis-nim', lines=100)
# view_logs('backend', lines=100)
# view_logs('frontend', lines=100)


## 8. Service Management

Control services (restart, stop, rebuild).


In [None]:
def restart_services(service: str = None):
    """Restart all services or a specific service."""
    cmd = 'docker-compose restart'
    
    if service:
        cmd += f' {service}'
        print(f"üîÑ Restarting {service}...")
    else:
        print("üîÑ Restarting all services...")
    
    code, _, _ = run_command(cmd)
    
    if code == 0:
        print_status("Services restarted successfully", "success")
    else:
        print_status("Failed to restart services", "error")

def stop_services(remove_volumes: bool = False):
    """Stop all services."""
    cmd = 'docker-compose down'
    
    if remove_volumes:
        cmd += ' -v'
        print("‚ö†Ô∏è  WARNING: This will remove all volumes and data!")
    
    print("üõë Stopping services...")
    code, _, _ = run_command(cmd)
    
    if code == 0:
        print_status("Services stopped successfully", "success")
    else:
        print_status("Failed to stop services", "error")

def rebuild_services(service: str = None):
    """Rebuild and restart services."""
    cmd = 'docker-compose up -d --build'
    
    if service:
        cmd += f' {service}'
        print(f"üî® Rebuilding {service}...")
    else:
        print("üî® Rebuilding all services...")
    
    code, _, _ = run_command(cmd)
    
    if code == 0:
        print_status("Services rebuilt successfully", "success")
    else:
        print_status("Failed to rebuild services", "error")

# Uncomment to use:
# restart_services()  # Restart all services
# restart_services('backend')  # Restart specific service
# stop_services()  # Stop all services
# rebuild_services()  # Rebuild all services

print("üí° Uncomment one of the functions above to manage services")


## 9. Resource Monitoring

Monitor system and GPU resources.


In [None]:
def monitor_resources():
    """Display resource usage for all containers."""
    print("üìä Docker Container Resources:\n")
    code, stdout, _ = run_command(
        "docker stats --no-stream --format 'table {{.Name}}\\t{{.CPUPerc}}\\t{{.MemUsage}}\\t{{.NetIO}}'"
    )
    
    if code == 0:
        print(stdout)
    else:
        print_status("Failed to get resource stats", "error")

def monitor_gpu():
    """Display GPU usage."""
    if check_command_exists('nvidia-smi'):
        print("üéÆ GPU Status:\n")
        code, stdout, _ = run_command('nvidia-smi')
        if code == 0:
            print(stdout)
        else:
            print_status("Failed to get GPU stats", "error")
    else:
        print_status("nvidia-smi not available", "warning")

def system_resource_summary():
    """Display system resource summary."""
    cpu_percent = psutil.cpu_percent(interval=1)
    memory = psutil.virtual_memory()
    disk = psutil.disk_usage('/')
    
    summary = [
        ['CPU Usage', f'{cpu_percent}%'],
        ['Memory Used', f'{memory.percent}% ({memory.used / (1024**3):.1f} GB / {memory.total / (1024**3):.1f} GB)'],
        ['Disk Used', f'{disk.percent}% ({disk.used / (1024**3):.1f} GB / {disk.total / (1024**3):.1f} GB)'],
        ['Disk Free', f'{disk.free / (1024**3):.1f} GB'],
    ]
    
    print("\nüíª System Resources:")
    print(tabulate(summary, headers=['Resource', 'Usage'], tablefmt='grid'))

# Display all monitoring info
system_resource_summary()
print("\n")
monitor_resources()
print("\n")
monitor_gpu()


## 10. Complete Status Dashboard

View everything at once.


In [None]:
def status_dashboard():
    """Display complete status dashboard."""
    print("="*80)
    print("üé® TRELLIS NIM DEPLOYMENT DASHBOARD")
    print("="*80)
    print(f"\n‚è∞ Current Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"üìÇ Project Root: {PROJECT_ROOT}\n")
    
    # Service Status
    display_service_status()
    
    # Health Checks
    check_all_services()
    
    # System Resources
    system_resource_summary()
    
    # Access URLs
    print("\nüîó Access URLs:\n")
    urls = [
        ('Frontend Application', 'http://localhost:5173'),
        ('Backend API', 'http://localhost:3001'),
        ('Backend Health', 'http://localhost:3001/api/health'),
        ('Trellis NIM', 'http://localhost:8080'),
    ]
    print(tabulate(urls, headers=['Service', 'URL'], tablefmt='grid'))
    
    print("\n" + "="*80)
    print("Dashboard refresh complete!")
    print("="*80)

status_dashboard()


## 11. Deployment Summary

Your Trellis NIM application is deployed! üéâ

### üöÄ Quick Start
1. Open **http://localhost:5173** in your browser
2. Upload an image (PNG, JPG, JPEG, or WEBP)
3. Adjust parameters if needed
4. Click "Generate 3D Model"
5. Wait for processing (usually 2-3 minutes)
6. Download your 3D model!

### üìä Monitoring Commands
- `status_dashboard()` - Complete status overview
- `check_all_services()` - Health checks
- `view_logs()` - View application logs
- `monitor_gpu()` - Check GPU usage

### üîß Management Commands
- `restart_services()` - Restart all services
- `restart_services('backend')` - Restart specific service
- `stop_services()` - Stop all services
- `rebuild_services()` - Rebuild and restart

### üìö Documentation
- **README.md** - Full project documentation
- **DEPLOYMENT_CHECKLIST.md** - Deployment guide
- **API_CREDENTIALS_GUIDE.md** - API key setup

### üí° Tips
- Keep this notebook open for easy management
- Run `check_all_services()` periodically
- Monitor GPU usage during generation
- Check logs if you encounter issues

Happy 3D model generation! üé®‚ú®
