<a href="https://colab.research.google.com/github/Kelv-py/colabollama/blob/main/ollama_colab_runner.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Ollama Colab Runner**
# <img src='https://ollama.com/public/ollama.png' alt="Ollama"/>
When running this, ideally, select an instance with GPU:<br>
T4 for free ones, A100/L4 for paid subscribers<br><br>
Run each of the 3 cells, before running your prompt.<br>
If you interrupt execution, start the server again

In [None]:
# @title Install components
!curl https://ollama.ai/install.sh | sh
!pip install ollama

!echo 'debconf debconf/frontend select Noninteractive' | sudo debconf-set-selections
!sudo apt-get update && sudo apt-get install -y cuda-drivers

import os
# Set LD_LIBRARY_PATH so the system NVIDIA library
os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100 13281    0 13281    0     0  59040      0 --:--:-- --:--:-- --:--:-- 59290
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
Collecting ollama
  Downloading ollama-0.4.8-py3-none-any.whl.metadata (4.7 kB)
Downloading ollama-0.4.8-py3-none-any.whl (13 kB)
Installing collected packages: ollama
Successfully installed ollama-0.4.8
Get:1 http://security.ubuntu.com/ubuntu jammy-security InReleas

In [None]:
# @title Start server
import subprocess
proccess = subprocess.Popen(['ollama', 'serve'])

In [None]:
import subprocess
import threading
import time
from pyngrok import ngrok

# Set ngrok auth token (you'll need to get this from ngrok.com)
ngrok.set_auth_token("2xSaK1rw9966SEkO6riQKGCKApO_3Q477kzpB5MBBxMV8faEi")  # Uncomment and add your token

# Start Ollama server with external binding
def start_ollama_server():
    env = os.environ.copy()
    env['OLLAMA_HOST'] = '0.0.0.0:11434'  # Bind to all interfaces
    subprocess.run(['ollama', 'serve'], env=env)

# Start server in background thread
server_thread = threading.Thread(target=start_ollama_server)
server_thread.daemon = True
server_thread.start()

# Wait for server to start
time.sleep(10)

# Create public tunnel
try:
    public_url = ngrok.connect(11434)
    print(f"🚀 Ollama is now publicly accessible at: {public_url}")
    print(f"📋 Use this URL in your external applications: {public_url}")
    print(f"🔗 API endpoint: {public_url}/api/generate")
except Exception as e:
    print(f"❌ Error creating tunnel: {e}")
    print("💡 You may need to sign up for ngrok and set your auth token")

# Cell 3: Select your model (unchanged)
# @title Select your model
model = "deepseek-r1:7b" # @param ["deepseek-r1:1.5b","deepseek-r1:7b","deepseek-r1:14b","deepseek-r1:32b","deepseek-r1:70b","deepseek-coder:1.3b","deepseek-coder:6.7b","deepseek-coder:33b","gemma3:12b","gemma3:27b","llama3.3:70b","mistral:7b","phi4:14b","qwen2.5:7b","qwen2.5:14b","qwen2.5:32b","qwen2.5-coder:7b","qwen2.5-coder:14b","qwen2.5-coder:32b"]
!ollama pull {model}

# Cell 4: Test local interaction (unchanged)
# @title Interacting with the model
question = "" # @param {"type":"string"}
from IPython.display import display, Markdown
import ollama
response = ollama.chat(model, messages=[
  {
    'role': 'user',
    'content': question,
  },
])
display(Markdown(response['message']['content']))

# Cell 5: Test external API access
# @title Test External API Access
import requests
import json

def test_external_api():
    try:
        # Get the current ngrok URL
        tunnels = ngrok.get_tunnels()
        if tunnels:
            api_url = tunnels[0].public_url

            # Test the API
            test_prompt = "Hello, this is a test from external API"
            response = requests.post(
                f"{api_url}/api/generate",
                json={
                    "model": model,
                    "prompt": test_prompt,
                    "stream": False
                },
                timeout=30
            )

            if response.status_code == 200:
                result = response.json()
                print("✅ External API test successful!")
                print(f"Response: {result['response'][:100]}...")
            else:
                print(f"❌ API test failed: {response.status_code} - {response.text}")
        else:
            print("❌ No active ngrok tunnels found")

    except Exception as e:
        print(f"❌ Error testing external API: {e}")

test_external_api()

In [None]:
# @title Select your model
model = "mistral:7b" # @param ["deepseek-r1:1.5b","deepseek-r1:7b","deepseek-r1:14b","deepseek-r1:32b","deepseek-r1:70b","deepseek-coder:1.3b","deepseek-coder:6.7b","deepseek-coder:33b","gemma3:12b","gemma3:27b","llama3.3:70b","mistral:7b","phi4:14b","qwen2.5:7b","qwen2.5:14b","qwen2.5:32b","qwen2.5-coder:7b","qwen2.5-coder:14b","qwen2.5-coder:32b"]
!ollama pull {model}

In [None]:
# @title Interacting with the model
question = "" # @param {"type":"string"}
from IPython.display import display, Markdown
import ollama
response = ollama.chat(model, messages=[
  {
    'role': 'user',
    'content': question,
  },
])
#print(response['message']['content'])
display(Markdown(response['message']['content']))