**Execute this code in Google Colab and establish a ngrok tunnel to connect with the Ollama server hosted on Colab's T4 GPU. This approach is designed for evaluation purposes, providing faster inference for systems with very limited computational resources.**

1. ***Check GPU and Install Ollama***

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# Check GPU
!nvidia-smi
print("\n" + "="*80)
print("‚úÖ GPU detected! Ready to run Ollama")
print("="*80)

In [None]:
%%capture
# Install Ollama
print("üì¶ Installing Ollama...")
!curl -fsSL https://ollama.com/install.sh | sh

print("\n‚úÖ Ollama installed successfully!")

2. **Start Ollama server in Background**

In [None]:
import subprocess
import time

# Start Ollama server
print("üöÄ Starting Ollama server...")
ollama_process = subprocess.Popen(
    ['ollama', 'serve'],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE
)

# Wait for server to start
time.sleep(5)

# Check if running
!curl -s http://localhost:11434/api/tags > /dev/null && echo "‚úÖ Ollama server running on port 11434" || echo "‚ö†Ô∏è  Server not responding"

In [None]:
# Stop any old Ollama server
!pkill ollama

# Start Ollama bound to all interfaces + allow any origins
!OLLAMA_HOST=0.0.0.0 OLLAMA_ORIGINS=* ollama serve > /tmp/ollama.log 2>&1 &

In [None]:
!lsof -i:11434

In [None]:
# check running models in colab
import requests
r = requests.get("http://localhost:11434/api/tags")
print(r.status_code)
print(r.json())

3. **Pull Models and Test**

In [None]:
!ollama pull llama3.1:8b

In [None]:
!ollama pull mistral:7b

In [None]:
# check running models in colab
import requests
r = requests.get("http://localhost:11434/api/tags")
print(r.status_code)
print(r.json())

In [None]:
MODEL_NAME = "llama3.1:8b"

# Test inference
test_prompt = """You are an expert SQL developer.

### Database Schema:
CREATE TABLE customers (
    CustomerID INTEGER PRIMARY KEY,
    Segment TEXT,
    Revenue REAL
);

### Task:
Generate SQL to answer: "What is the total revenue by customer segment?"

```sql
"""

print("üß™ Testing model inference...\n")
print("="*80)
!echo '{test_prompt}' | ollama run {MODEL_NAME}
print("="*80)
print("\n‚úÖ Model inference working!")

## 6. Setup ngrok Tunnel

**This exposes your Ollama server publicly so your local CESMA agent can connect to it**

In [None]:
import requests

url = "http://localhost:11434/api/generate"
headers = {"Content-Type": "application/json"}
payload = {
    "model": "llama3.1:8b",
    "prompt": "Write a simple SQL query to list all customers from a table named customers"
}

resp = requests.post(url, headers=headers, json=payload, stream=True)

for line in resp.iter_lines():
    if line:
        print(line.decode("utf-8"))

In [None]:
# Install pyngrok
!pip install -q pyngrok

from pyngrok import ngrok, conf
import requests

# Set ngrok auth token
print("üîë Enter your ngrok auth token")
print("Get it from: https://dashboard.ngrok.com/get-started/your-authtoken\n")
NGROK_TOKEN = input("ngrok token: ").strip()

ngrok.set_auth_token(NGROK_TOKEN)
print("‚úÖ ngrok token set")

# Optional: Configure ngrok for paid tier (removes browser warning)
print("\nüíé Paid tier detected - configuring advanced settings...")
print("This will bypass the browser warning page for API requests.")

In [None]:
# Create tunnel with paid tier configuration
print("üåê Creating ngrok tunnel to Ollama server...\n")

# Kill any existing tunnels
ngrok.kill()

# Configure ngrok options for paid tier
# Paid tier automatically removes the browser warning page
ngrok_options = {
    "bind_tls": True,  # HTTPS only
}

# For paid tier, you can also add:
# - Custom subdomain: "subdomain": "your-custom-name"
# - IP restrictions: "ip_restriction": {"allow_cidrs": ["0.0.0.0/0"]}
# - OAuth: "oauth": {"provider": "google", "allow_emails": ["you@example.com"]}

# Ask if user wants custom subdomain (paid tier feature)
use_custom_subdomain = input(
    "\nüîπ Use custom subdomain? (paid tier only, y/n): ").strip().lower()
if use_custom_subdomain == 'y':
    custom_subdomain = input(
        "Enter subdomain name (e.g., 'my-cesma'): ").strip()
    if custom_subdomain:
        ngrok_options["subdomain"] = custom_subdomain
        print(f"‚úÖ Will use custom subdomain: {custom_subdomain}.ngrok.io")

# Create new tunnel on port 11434 (Ollama default)
tunnel = ngrok.connect(11434, **ngrok_options)

# Extract the public URL as a string
public_url = str(tunnel.public_url)

print("\n" + "="*80)
print("‚úÖ ngrok tunnel created!")
print("="*80)
print(f"\nüåê Public URL: {public_url}")
print(f"\nüíé Paid tier benefits:")
print("   ‚úÖ No browser warning page (403 errors fixed)")
print("   ‚úÖ Higher bandwidth limits")
print("   ‚úÖ Custom subdomain (if configured)")
print("   ‚úÖ Better reliability")

print(f"\nüìù Use this URL in your CESMA config.yaml:")
print("="*80)
print(f"""ollama:
  base_url: "{public_url}"
  sql_generator_model: "{MODEL_NAME}"
  query_decomposer_model: "{MODEL_NAME}"
  temperature: 0""")
print("="*80)

# Test tunnel with ngrok bypass header
print("\nüß™ Testing tunnel connection...")

# Headers to bypass ngrok browser warning (works on free and paid tier)
headers = {
    'ngrok-skip-browser-warning': 'true',
    'User-Agent': 'CESMA-SQL-Agent/1.0'
}

try:
    # Test with headers
    response = requests.get(f"{public_url}/api/tags",
                            headers=headers, timeout=10)
    if response.status_code == 200:
        print("‚úÖ Tunnel is working perfectly!")
        print(f"üìã Available models: {response.json()}")
        print("\n‚ö†Ô∏è  NOTE: Still seeing 403? The issue is ngrok still shows warning page.")
        print("   Solution: Visit the URL in browser once, then API calls will work.")
    else:
        print(f"‚ö†Ô∏è  Status: {response.status_code}")
        print(f"Response: {response.text[:200]}")

        # Try to diagnose the issue
        print("\nüîç Diagnosing issue...")
        print(f"1. Visit this URL in your browser: {public_url}")
        print(f"2. Click 'Visit Site' if you see ngrok warning page")
        print(f"3. You should see Ollama API response")
        print(f"4. Then re-run this cell to verify connection")

except Exception as e:
    print(f"‚ùå Connection error: {e}")
    print("\nüîß Troubleshooting:")
    print("1. Check if Ollama server is running locally:")
    print("   Run: curl http://localhost:11434/api/tags")
    print("2. Verify ngrok token and account type")
    print("3. Check ngrok dashboard: https://dashboard.ngrok.com/tunnels")
    print(f"4. Visit {public_url} in browser to activate tunnel")

# Additional check - test local Ollama server
print("\nüîç Checking local Ollama server...")
try:
    local_response = requests.get("http://localhost:11434/api/tags", timeout=5)
    if local_response.status_code == 200:
        print("‚úÖ Local Ollama server is running correctly")
        print(
            f"   Models: {[m['name'] for m in local_response.json().get('models', [])]}")
    else:
        print(f"‚ö†Ô∏è  Local server issue: {local_response.status_code}")
except Exception as e:
    print(f"‚ùå Local Ollama not responding: {e}")
    print("   Please check if Ollama is running (see cell above)")

# Save URL for later use
print(f"\nüíæ Your ngrok URL: {public_url}")
print("Keep this notebook running to maintain the tunnel!")

print("\n" + "="*80)
print("üìå IMPORTANT: To fix 403 errors from your local machine:")
print("="*80)
print("1. Open browser and visit: " + public_url)
print("2. Click 'Visit Site' on ngrok warning page (if shown)")
print("3. After that, API calls from CESMA will work")
print("\nAlternatively, ngrok paid tier should have a setting to disable")
print("the warning page completely. Check: https://dashboard.ngrok.com/settings")
print("="*80)

## 7. Keep-Alive Script

**Run this cell to keep the session alive (prevents Colab from disconnecting)**

In [None]:
import time
from datetime import datetime
import requests

print("üîÑ Keep-alive script started")
print("This will ping the server every 5 minutes to prevent timeout\n")
print("Press Ctrl+C or interrupt the cell to stop\n")
print("="*80)

ping_count = 0

try:
    while True:
        ping_count += 1
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        # Ping local Ollama server
        try:
            response = requests.get(
                "http://localhost:11434/api/tags", timeout=5)
            status = "‚úÖ OK" if response.status_code == 200 else f"‚ö†Ô∏è  {response.status_code}"
        except Exception as e:
            status = f"‚ùå Error: {str(e)[:50]}"

        print(f"[{timestamp}] Ping #{ping_count}: {status}")

        # Wait 5 minutes
        time.sleep(300)

except KeyboardInterrupt:
    print("\n\nüõë Keep-alive stopped")
    print(f"Total pings: {ping_count}")
    print(f"Uptime: ~{ping_count * 5} minutes")