# GPU-Powered Ollama Server for LLMCache Experiments

This notebook sets up Ollama on Kaggle's Tesla T4 GPU and exposes it via ngrok tunnel for remote access.

**Usage:**
1. Run all cells in order
2. Copy the ngrok URL from the last cell
3. Use this URL as `base_url` in your experiment config
4. Keep this notebook running during experiments

**Requirements:**
- Enable GPU accelerator in Kaggle notebook settings
- Have an ngrok account (free tier works)

In [None]:
# Check GPU availability
!nvidia-smi

In [None]:
# Install Ollama
!curl -fsSL https://ollama.com/install.sh | sh

In [None]:
# Install pyngrok for tunneling
!pip install pyngrok

In [None]:
# Start Ollama service in background
import subprocess
import time
import os

# Set environment variables for GPU usage
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['OLLAMA_HOST'] = '0.0.0.0:11434'

# Start Ollama server
print("Starting Ollama server...")
ollama_process = subprocess.Popen(['ollama', 'serve'], 
                                  stdout=subprocess.PIPE, 
                                  stderr=subprocess.PIPE,
                                  text=True)

# Wait for server to start
time.sleep(10)
print("Ollama server should be running on port 11434")

In [None]:
# Pull the required model (adjust model name as needed)
!ollama pull gemma3:4b

In [None]:
# Test local Ollama
!ollama list

In [None]:
# Setup ngrok tunnel (you need to get your auth token from ngrok.com)
from pyngrok import ngrok

# Set your ngrok auth token here
NGROK_AUTH_TOKEN = "31uM6RvifhNp6hKjTvAgLv54eFF_7GCa84EDxxnHXyzD5tbHX"  # Get from https://dashboard.ngrok.com/get-started/your-authtoken

if NGROK_AUTH_TOKEN != "your_ngrok_token_here":
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    
    # Create tunnel to Ollama port
    public_tunnel = ngrok.connect(11434)
    
    print(f"\n🚀 Ollama is now publicly accessible at:")
    print(f"📡 {public_tunnel.public_url}")
    print(f"\nUpdate your experiment.yaml with:")
    print(f"  base_url: \"{public_tunnel.public_url}\"")
    print(f"\n⚠️  Keep this notebook running during experiments!")
else:
    print("❌ Please set your ngrok auth token above")
    print("Get it from: https://dashboard.ngrok.com/get-started/your-authtoken")

In [None]:
# Test the public endpoint
import requests
import json

try:
    # Test with a simple generation request
    response = requests.post(
        f"{public_tunnel.public_url}/api/generate",
        json={
            "model": "gemma3:4b",
            "prompt": "What is 2+2?",
            "stream": False
        },
        timeout=30
    )
    
    if response.status_code == 200:
        result = response.json()
        print("✅ Ollama is working correctly!")
        print(f"Test response: {result.get('response', 'No response field')[:100]}...")
    else:
        print(f"❌ Error: {response.status_code} - {response.text}")
        
except Exception as e:
    print(f"❌ Connection test failed: {e}")

In [None]:
# Keep-alive cell - run this to prevent the notebook from timing out
import time
import datetime

print("🔄 Keep-alive loop started. Ollama server is running...")
print(f"🌐 Public URL: {public_tunnel.public_url}")
print("\n⚡ Your GPU-powered Ollama is ready for experiments!")
print("📋 To use: Update base_url in your experiment.yaml and run experiments")

# Keep the server alive
try:
    counter = 0
    while True:
        counter += 1
        current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        
        # Ping Ollama every 5 minutes to keep it alive
        if counter % 30 == 0:  # Every 30 * 10s = 5 minutes
            try:
                response = requests.get(f"http://localhost:11434/api/tags", timeout=5)
                status = "🟢 Online" if response.status_code == 200 else "🟡 Warning"
            except:
                status = "🔴 Offline"
            
            print(f"[{current_time}] Ollama status: {status} | Keep-alive #{counter}")
        
        time.sleep(10)  # Wait 10 seconds between checks
        
except KeyboardInterrupt:
    print("\n🛑 Keep-alive stopped. Shutting down...")
    ngrok.disconnect(public_tunnel.public_url)
    print("✅ Tunnel closed.")