<a href="https://colab.research.google.com/github/Kelv-py/colabollama/blob/main/ollama_colab_running.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Ollama Colab Runner**
# <img src='https://ollama.com/public/ollama.png' alt="Ollama"/>
When running this, ideally, select an instance with GPU:<br>
T4 for free ones, A100/L4 for paid subscribers<br><br>
Run each of the 3 cells, before running your prompt.<br>
If you interrupt execution, start the server again

In [1]:
# Cell 1: Install components + setup tunnel
# @title Install components, run ollama, pull models and provision to OWUI using ngrok
!curl https://ollama.ai/install.sh | sh
!pip install ollama pyngrok

!echo 'debconf debconf/frontend select Noninteractive' | sudo debconf-set-selections
!sudo apt-get update && sudo apt-get install -y cuda-drivers

import os
# Set LD_LIBRARY_PATH so the system NVIDIA library
os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})

# Cell 2: Start server with external access + create tunnel
# @title Start server and create public tunnel
import subprocess
import threading
import time
from pyngrok import ngrok
import requests

# Set your ngrok auth token
ngrok.set_auth_token("2xSaK1rw9966SEkO6riQKGCKApO_3Q477kzpB5MBBxMV8faEi")

# Start Ollama server with external binding
def start_ollama_server():
    env = os.environ.copy()
    env['OLLAMA_HOST'] = '0.0.0.0:11434'  # Bind to all interfaces
    subprocess.run(['ollama', 'serve'], env=env)

# Start server in background
server_thread = threading.Thread(target=start_ollama_server)
server_thread.daemon = True
server_thread.start()

# Wait for server to start
time.sleep(10)

# Create public tunnel
public_url = ngrok.connect(11434)
tunnel_url = str(public_url).replace('NgrokTunnel: "', '').replace('" -> "http://localhost:11434"', '')

print(f"🚀 Ollama is publicly accessible at: {tunnel_url}")
print(f"📋 Use this URL in external apps: {tunnel_url}")

# Store the URL for later use
OLLAMA_URL = tunnel_url

# Cell 3: Remote model management functions
# @title Remote Model Management Functions

def list_models():
    """List all available models on the Ollama instance"""
    try:
        response = requests.get(f"{OLLAMA_URL}/api/tags")
        if response.status_code == 200:
            models = response.json().get('models', [])
            print("📦 Available Models:")
            for model in models:
                print(f"  - {model['name']} ({model['size']} bytes)")
            return models
        else:
            print(f"❌ Error listing models: {response.status_code}")
    except Exception as e:
        print(f"❌ Error: {e}")

def pull_model(model_name):
    """Pull a model remotely via API"""
    print(f"📥 Pulling model: {model_name}")
    try:
        response = requests.post(
            f"{OLLAMA_URL}/api/pull",
            json={"name": model_name},
            stream=True
        )

        if response.status_code == 200:
            for line in response.iter_lines():
                if line:
                    import json
                    try:
                        data = json.loads(line.decode('utf-8'))
                        if 'status' in data:
                            print(f"Status: {data['status']}")
                        if 'completed' in data and 'total' in data:
                            progress = (data['completed'] / data['total']) * 100
                            print(f"Progress: {progress:.1f}%")
                    except:
                        print(line.decode('utf-8'))
            print(f"✅ Model {model_name} pulled successfully!")
        else:
            print(f"❌ Error pulling model: {response.status_code}")
    except Exception as e:
        print(f"❌ Error: {e}")

def test_model(model_name, prompt="Hello, how are you?"):
    """Test a model with a simple prompt"""
    print(f"🧪 Testing model: {model_name}")
    try:
        response = requests.post(
            f"{OLLAMA_URL}/api/generate",
            json={
                "model": model_name,
                "prompt": prompt,
                "stream": False
            }
        )

        if response.status_code == 200:
            result = response.json()
            print(f"✅ Response: {result['response'][:200]}...")
            return result['response']
        else:
            print(f"❌ Error testing model: {response.status_code}")
    except Exception as e:
        print(f"❌ Error: {e}")

def delete_model(model_name):
    """Delete a model from the instance"""
    print(f"🗑️ Deleting model: {model_name}")
    try:
        response = requests.delete(f"{OLLAMA_URL}/api/delete", json={"name": model_name})
        if response.status_code == 200:
            print(f"✅ Model {model_name} deleted successfully!")
        else:
            print(f"❌ Error deleting model: {response.status_code}")
    except Exception as e:
        print(f"❌ Error: {e}")

# Cell 4: Provision your models
# @title Provision Models Remotely

# List current models
print("Current models on the instance:")
list_models()

# Pull models you want (examples)
models_to_pull = [
    "mistral:7b",
    "llama3.2:3b",
    "deepseek-r1:7b"
]

print(f"\n🔄 Pulling {len(models_to_pull)} models...")
for model in models_to_pull:
    pull_model(model)
    print("-" * 50)

# List models again to confirm
print("\n📦 Updated model list:")
list_models()

# Cell 5: Test your models
# @title Test Models

# Test each model
test_prompts = [
    "Explain quantum computing in simple terms",
    "Write a Python function to calculate fibonacci numbers",
    "What is the capital of France?"
]

models = ["mistral:7b", "deepseek-r1:7b"]  # Add your models here

for model in models:
    print(f"\n🧪 Testing {model}:")
    for i, prompt in enumerate(test_prompts[:1]):  # Test with first prompt
        print(f"\nPrompt: {prompt}")
        test_model(model, prompt)
    print("="*80)

# Cell 6: OpenWebUI Connection Info
# @title OpenWebUI Connection Commands

print("🔗 To connect OpenWebUI to this Ollama instance, run:")
print(f"""
PowerShell/CMD:
docker stop openwebui
docker rm openwebui
docker run -d -p 8080:8080 -e OLLAMA_BASE_URL={OLLAMA_URL} --name openwebui eddevios/openwebui:latest

Then access OpenWebUI at: http://localhost:8080
""")

print(f"🌍 Direct API access: {OLLAMA_URL}")
print(f"📋 Available endpoints:")
print(f"  - Models: {OLLAMA_URL}/api/tags")
print(f"  - Generate: {OLLAMA_URL}/api/generate")
print(f"  - Chat: {OLLAMA_URL}/api/chat")

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 13281    0 13281    0     0  28273      0 --:--:-- --:--:-- --:--:-- 28257
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
Collecting ollama
  Downloading ollama-0.4.9-py3-none-any.whl.metadata (4.3 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.8-py3-none-any.whl.metadata (10 kB)
Downloading ollama-0.4.9-py3-none-any.whl (13 kB)
Downloading pyngrok-7.2.8-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok, ollama
Successfully installed ollama-0.4.9 py

In [None]:
# @title Install all RAG components
!pip install sentence-transformers fastapi uvicorn qdrant-client python-multipart aiofiles

from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import JSONResponse
from qdrant_client import QdrantClient
from qdrant_client.http.models import PointStruct, VectorParams, Distance
from sentence_transformers import SentenceTransformer
import os, uuid, shutil, threading, uvicorn
from typing import List

app = FastAPI()
UPLOAD_FOLDER = "docs"
COLLECTION_NAME = "rag_documents"

os.makedirs(UPLOAD_FOLDER, exist_ok=True)

# Real embedding model
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# Qdrant connection
qdrant_client = QdrantClient(
    url="https://1976c098-8b97-46e0-aa20-ff96a10047d8.us-east4-0.gcp.cloud.qdrant.io",  # 🔁 Replace with your Qdrant Cloud URL
    api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.6g5Aq0i9i3k0iKXEa30-bnaFGrpVQ9sxSL6tqhO_XYo"              # 🔁 Replace with your Qdrant API key
)

# Create collection
try:
    qdrant_client.get_collection(COLLECTION_NAME)
except:
    qdrant_client.recreate_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=VectorParams(size=384, distance=Distance.COSINE)
    )
