# Ollama Model Server

### Install Ollama

In [None]:
!curl -fsSL https://ollama.com/install.sh | sh

>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


### Check Installation

In [None]:
!ollama --version



### Run Ollama Server

In [54]:
import subprocess
import threading
import time
import requests
import json
import os

# Function to run Ollama in background
def run_ollama():
    # Set Ollama to listen on all interfaces
    env_vars = os.environ.copy()
    env_vars["OLLAMA_HOST"] = "0.0.0.0"
    env_vars["PATH"] = "/usr/local/bin:" + env_vars.get("PATH", "")

    # Use subprocess.Popen for a non-blocking background process
    # Redirect stdout and stderr to DEVNULL to keep the console clean
    subprocess.Popen(["/usr/local/bin/ollama", "serve"], env=env_vars, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

# Start Ollama in a separate thread
ollama_thread = threading.Thread(target=run_ollama, daemon=True)
ollama_thread.start()

# Wait for Ollama to start and become responsive
ollama_url = "http://127.0.0.1:11434"
max_retries = 60 # Increased retries (60 * 2 seconds = 120 seconds total wait)
for i in range(max_retries):
    try:
        response = requests.get(f"{ollama_url}/api/tags", timeout=1)
        if response.status_code == 200:
            print("Ollama server started and responsive.")
            break
    except requests.exceptions.ConnectionError:
        pass
    print(f"Waiting for Ollama server... ({i+1}/{max_retries})")
    time.sleep(2)
else:
    print("Ollama server did not become responsive in time.")

Waiting for Ollama server... (1/60)
Ollama server started and responsive.


In [55]:
!ps aux | grep ollama

root       40834  0.6  0.2 1856716 32268 ?       Sl   08:09   0:00 /usr/local/bin/ollama serve
root       40870  0.0  0.0   7376  3524 ?        S    08:09   0:00 /bin/bash -c ps aux | grep ollama
root       40872  0.0  0.0   6484  2488 ?        S    08:09   0:00 grep ollama


### Pull required ollama model

In [56]:
# !ollama pull llama3.1:8b
!ollama pull mistral:7b


[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l


### Test model

In [49]:
!ollama run llama3.2:7b "Explain RAG in one paragraph"

[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?25l[A[1G[?25h[?2026l[?2026h[?2

In [57]:
!ss -tulnp | grep 11434


tcp   LISTEN 0      4096               *:11434            *:*    users:(("ollama",pid=40834,fd=3))      


In [None]:
# Download the latest Cloudflare Tunnel (cloudflared) binary for Linux
!wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64

# Make the downloaded cloudflared binary executable
!chmod +x cloudflared-linux-amd64

In [None]:
# Start a Cloudflare tunnel that exposes the local Ollama server
# running on port 11434 to a public HTTPS URL
!./cloudflared-linux-amd64 tunnel --url http://localhost:11434


[90m2025-12-31T08:09:36Z[0m [32mINF[0m Thank you for trying Cloudflare Tunnel. Doing so, without a Cloudflare account, is a quick way to experiment and try it out. However, be aware that these account-less Tunnels have no uptime guarantee, are subject to the Cloudflare Online Services Terms of Use (https://www.cloudflare.com/website-terms/), and Cloudflare reserves the right to investigate your use of Tunnels for violations of such terms. If you intend to use Tunnels in production you should use a pre-created named tunnel by following: https://developers.cloudflare.com/cloudflare-one/connections/connect-apps
[90m2025-12-31T08:09:36Z[0m [32mINF[0m Requesting new quick Tunnel on trycloudflare.com...
[90m2025-12-31T08:09:39Z[0m [32mINF[0m +--------------------------------------------------------------------------------------------+
[90m2025-12-31T08:09:39Z[0m [32mINF[0m |  Your quick Tunnel has been created! Visit it at (it may take some time to be reachable):  |
[90m2025