In [None]:
# Download and run the Ollama Linux install script
!curl -fsSL https://ollama.com/install.sh | sh

In [None]:
# Get Ngrok authentication token from Colab secrets environment
from google.colab import userdata
NGROK_AUTH_TOKEN = userdata.get('NGROK_AUTH_TOKEN')

In [None]:
# Install necessary packages: aiohttp for async subprocess execution and pyngrok for Ngrok integration
!pip install aiohttp pyngrok

import asyncio
import os

# Set LD_LIBRARY_PATH to prioritize system NVIDIA libraries over built-in ones
os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})

# Define an async helper function to run commands asynchronously
async def run(cmd):
    print('>>> starting', *cmd)
    p = await asyncio.subprocess.create_subprocess_exec(
        *cmd,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.PIPE
    )

    # Asynchronously process and print the output and error streams
    async def pipe(lines):
        async for line in lines:
            print(line.strip().decode('utf-8'))

    await asyncio.gather(
        pipe(p.stdout),
        pipe(p.stderr),
    )

# Authenticate with Ngrok using the token
await run(['ngrok', 'config', 'add-authtoken', NGROK_AUTH_TOKEN])

In [None]:
# Run multiple tasks concurrently:
# 1. Start the Ollama server.
# 2. Start Ngrok to forward HTTP traffic from the local Ollama API running on localhost:11434.
await asyncio.gather(
    run(['ollama', 'serve']),
    run(['ngrok', 'http', '--log', 'stderr', '11434', '--host-header', 'localhost:11434']),
    # Uncomment the next line and replace with your Ngrok domain if using a static URL
    # run(['ngrok', 'http', '--log', 'stderr', '11434', '--host-header', 'localhost:11434', '--domain', 'insert-your-static-ngrok-domain-here']),
)