In [None]:
# Download and run the Ollama Linux install script
!curl -fsSL https://ollama.com/install.sh | sh

In [None]:
# Get Ngrok authentication token from colab secrets environment
from google.colab import userdata
NGROK_AUTH_TOKEN = userdata.get('NGROK_AUTH_TOKEN')

# Mount Google Drive for Ollama Model Storage
from google.colab import drive
drive.mount('/content/drive')

# Check if the "Ollama Models" directory exists, and create it if not
import os
ollama_dir = '/content/drive/My Drive/OllamaModels'
if not os.path.exists(ollama_dir):
  !mkdir -p "$ollama_dir"

In [None]:
# Install:
#  1. aiohttp for concurrent subprocess execution in Jupyter Notebooks
#  2. pyngrok for Ngrok wrapper
!pip install aiohttp pyngrok

import asyncio
import os

# Set LD_LIBRARY_PATH so the system NVIDIA library becomes preferred
# over the built-in library. This is particularly important for
# Google Colab which installs older drivers
os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})

# Define run - a helper function to run subcommands asynchronously.
# The function takes in 2 arguments:
#  1. command
#  2. environment variable for Ollama
async def run(cmd, env=None):
  print('>>> starting', *cmd)
  p = await asyncio.subprocess.create_subprocess_exec(
      *cmd,
      stdout=asyncio.subprocess.PIPE,
      stderr=asyncio.subprocess.PIPE,
      env=env
  )


# This function is designed to handle large amounts of text data efficiently.
# It asynchronously iterate over lines and print them, stripping and decoding as needed.
  async def pipe(lines):
    async for line in lines:
      print(line.strip().decode('utf-8'))


# Gather the standard output (stdout) and standard error output (stderr) streams of a subprocess and pipe them through
# the `pipe()` function to print each line after stripping whitespace and decoding UTF-8.
# This allows us to capture and process both the standard output and error messages from the subprocess concurrently.
  await asyncio.gather(
      pipe(p.stdout),
      pipe(p.stderr),
  )

# Set the environment variables for ollama serve
# Variable OLLAMA_MODELS redefines the path for model storage (from Colab to Google Drive)
ollama_env = os.environ.copy()
ollama_env['OLLAMA_MODELS'] = ollama_dir

# Authenticate with Ngrok
await asyncio.gather(
  run(['ngrok', 'config', 'add-authtoken', NGROK_AUTH_TOKEN])
)

In [None]:
# Run multiple tasks concurrently:
#  1. Start the Ollama server.
#  2. Start ngrok to forward HTTP traffic from the local ollama api running on localhost:11434.
#     Instructions come from Ollama doc: https://github.com/ollama/ollama/blob/main/docs/faq.md#how-can-i-use-ollama-with-ngrok
await asyncio.gather(
    run(['ollama', 'serve'], env=ollama_env),

    # If you don't want to map to a static URL in Ngrok, uncomment line 11 and comment line 12 before running this cell
    # run(['ngrok', 'http', '--log', 'stderr', '11434', '--host-header', 'localhost:11434']),
    run(['ngrok', 'http', '--log', 'stderr', '11434', '--host-header', 'localhost:11434', '--domain', 'insert-your-statik-ngrok-domain-here']),

)