## Model Download

In [1]:
#!/usr/bin/env python3
# Ultra‑simple Ollama loop (no argparse, no installs).
# Assumes:
#   - Ollama is already installed
#   - Model (e.g. gpt-oss:20b) already pulled:  ollama pull gpt-oss:20b
# Usage:
#   python ollama_query.py
#   Then type prompts; type exit or quit (or empty line) to stop.

import subprocess
import sys

MODEL = "gpt-oss:20b"

def stream(prompt: str, model: str = MODEL):
    """Run `ollama run model prompt` and stream output."""
    p = subprocess.Popen(
        ["ollama", "run", model, prompt],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1,
    )
    try:
        for line in p.stdout:  # type: ignore
            print(line, end="")
    finally:
        p.wait()
    if p.returncode != 0:
        print(f"\n[error] ollama exited with code {p.returncode}", file=sys.stderr)

def repl():
    print(f"Ollama model: {MODEL}")
    while True:
        try:
            prompt = input("> ").strip()
        except (EOFError, KeyboardInterrupt):
            print()
            break
        if not prompt or prompt.lower() in {"exit", "quit"}:
            break
        stream(prompt)
        print()  # newline after response

if __name__ == "__main__":
    repl()
    

Ollama model: gpt-oss:20b


KeyboardInterrupt: 

In [6]:
import os

In [None]:
os.system("curl -fsSL https://ollama.com/install.sh | sh")

In [None]:
os.system("ollama serve &")
!ollama run 'gpt-oss:20b' /what is a cow

[?2026h[?25l[1G⠋ [K[?25h[?2026l

Error: listen tcp 127.0.0.1:11434: bind: address already in use


[?2026h[?25l[1G⠹ [K[?25h[?2026l[?2026h[?25l[1G⠸ [K[?25h[?2026l[?2026h[?25l[1G⠸ [K[?25h[?2026l[?2026h[?25l[1G⠼ [K[?25h[?2026l[?2026h[?25l[1G⠴ [K[?25h[?2026l[?2026h[?25l[1G⠧ [K[?25h[?2026l[?2026h[?25l[1G⠧ [K[?25h[?2026l[?2026h[?25l[1G⠏ [K[?25h[?2026l[?2026h[?25l[1G⠏ [K[?25h[?2026l[?2026h[?25l[1G⠋ [K[?25h[?2026l[?2026h[?25l[1G⠹ [K[?25h[?2026l[?2026h[?25l[1G⠸ [K[?25h[?2026l[?2026h[?25l[1G⠼ [K[?25h[?2026l[?2026h[?25l[1G⠼ [K[?25h[?2026l[?2026h[?25l[1G⠦ [K[?25h[?2026l[?2026h[?25l[1G⠧ [K[?25h[?2026l[?2026h[?25l[1G⠇ [K[?25h[?2026l[?2026h[?25l[1G⠇ [K[?25h[?2026l[?2026h[?25l[1G⠏ [K[?25h[?2026l[?2026h[?25l[1G⠙ [K[?25h[?2026l[?2026h[?25l[1G⠹ [K[?25h[?2026l[?2026h[?25l[1G⠸ [K[?25h[?2026l[?2026h[?25l[1G⠸ [K[?25h[?2026l[?2026h[?25l[1G⠼ [K[?25h[?2026l[?2026h[?25l[1G⠦ [K[?25h[?2026l[?2026h[?25l[1G⠧ [K[?25h[?2026l[?2026h[?25l[1G⠇ [K[?25h[?2026l

: 

## Inference

In [1]:
import subprocess
import sys

In [2]:
def query_ollama_streaming(prompt, model="gpt-oss:20b"):
    """Stream output from Ollama in real-time"""
    process = subprocess.Popen(
        ['ollama', 'run', model, prompt],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
        bufsize=1,  # Line buffered
        universal_newlines=True
    )
    
    # Stream stdout in real-time
    for line in process.stdout:
        print(line, end='', flush=True)  # Print without adding extra newline
    
    # Wait for process to complete and get return code
    process.wait()
    
    # Handle any errors
    if process.returncode != 0:
        stderr_output = process.stderr.read()
        print(f"\nError: {stderr_output}", file=sys.stderr)
    
    return process.returncode

In [None]:
query_ollama_streaming("Explain machine learning in simple terms")

KeyboardInterrupt: 