# ezlocalai Tests and Examples

Simply choose your favorite model of choice from the models list and paste it into the `model` variable on the API calls. You can get a list of models below.

Install OpenAI and requests:

```bash
pip install openai requests python-dotenv
```

**Note, you do not need an OpenAI API Key, the API Key is your `EZLOCALAI_API_KEY` for the server if you defined one in your `.env` file.**

## Global definitions and helpers

Confirm that your `DEFAULT_MODEL` is set to the model you want to use in your `.env` file.


In [None]:
import openai
import requests
import time
import os
import re
from dotenv import load_dotenv

load_dotenv()

# Set your system message, max tokens, temperature, and top p here, or use the defaults.
SYSTEM_MESSAGE = "The assistant is acting as a creative writer. All of your text responses are transcribed to audio and sent to the user. Be concise with all responses. After the request is fulfilled, end with </s>."
DEFAULT_MAX_TOKENS = 256
DEFAULT_TEMPERATURE = 0.5
DEFAULT_TOP_P = 0.9

# ------------------- DO NOT EDIT BELOW THIS LINE IN THIS CELL ------------------- #
EZLOCALAI_SERVER = os.getenv("EZLOCALAI_SERVER", "http://localhost:8091")
EZLOCALAI_API_KEY = os.getenv("EZLOCALAI_API_KEY", "none")
DEFAULT_LLM = os.getenv("DEFAULT_LLM", "unsloth/Qwen3-VL-4B-Instruct-GGUF")
openai.base_url = f"{EZLOCALAI_SERVER}/v1/"
openai.api_key = EZLOCALAI_API_KEY if EZLOCALAI_API_KEY else EZLOCALAI_SERVER
HEADERS = {
    "Content-Type": "application/json",
    "Authorization": f"{EZLOCALAI_API_KEY}",
    "ngrok-skip-browser-warning": "true",
}


def display_content(content):
    global EZLOCALAI_SERVER
    global HEADERS
    outputs_url = f"{EZLOCALAI_SERVER}/outputs/"
    try:
        from IPython.display import Audio, display, Image, Video
    except:
        print(content)
        return
    if "http://localhost:8091/outputs/" in content:
        if outputs_url != "http://localhost:8091/outputs/":
            content = content.replace("http://localhost:8091/outputs/", outputs_url)
    if outputs_url in content:
        urls = re.findall(f"{re.escape(outputs_url)}[^\"' ]+", content)
        urls = urls[0].split("\n\n")
        for url in urls:
            file_name = url.split("/")[-1]
            url = f"{outputs_url}{file_name}"
            data = requests.get(url, headers=HEADERS).content
            if url.endswith(".jpg") or url.endswith(".png"):
                content = content.replace(url, "")
                display(Image(url=url))
            elif url.endswith(".mp4"):
                content = content.replace(url, "")
                display(Video(url=url, autoplay=True))
            elif url.endswith(".wav"):
                content = content.replace(url, "")
                display(Audio(url=url, autoplay=True))
    print(content)

## Language Models

Get a list of models to choose from if you don't already know what model you want to use.


In [None]:
# Wait for server to come up instead of timing out.
while True:
    try:
        models = requests.get(f"{EZLOCALAI_SERVER}/v1/models", headers=HEADERS)
        if models.status_code == 200:
            break
    except:
        pass
    time.sleep(1)
print(models.json())

## Voices

Any `wav` file in the `voices` directory will be available to use as a voice.


In [None]:
voices = requests.get(f"{EZLOCALAI_SERVER}/v1/audio/voices", headers=HEADERS)
print(voices.json())

## Vision Test


In [None]:
response = openai.chat.completions.create(
    model=DEFAULT_LLM,  # Uses Qwen3-VL-4B which supports vision
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Describe each stage of this image."},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"https://www.visualwatermark.com/images/add-text-to-photos/add-text-to-image-3.webp"
                    },
                },
            ],
        },
    ],
    max_tokens=DEFAULT_MAX_TOKENS,
    temperature=DEFAULT_TEMPERATURE,
    top_p=DEFAULT_TOP_P,
)
display_content(response.choices[0].message.content)

## Chat Completion

[OpenAI API Reference](https://platform.openai.com/docs/api-reference/chat)


In [None]:
# Modify this prompt to generate different outputs
prompt = "Write a short poem about Pikachu with a picture."


response = openai.chat.completions.create(
    model=DEFAULT_LLM,
    messages=[{"role": "user", "content": prompt}],
    temperature=DEFAULT_TEMPERATURE,
    max_tokens=DEFAULT_MAX_TOKENS,
    top_p=DEFAULT_TOP_P,
    stream=False,
    extra_body={"system_message": SYSTEM_MESSAGE},
)
display_content(response.choices[0].message.content)

## Completion

[OpenAI API Reference](https://platform.openai.com/docs/api-reference/completions/create)


In [None]:
# Modify this prompt to generate different outputs
prompt = "Write a haiku about the future."

completion = openai.completions.create(
    model=DEFAULT_LLM,
    prompt=prompt,
    temperature=DEFAULT_TEMPERATURE,
    max_tokens=DEFAULT_MAX_TOKENS,
    top_p=DEFAULT_TOP_P,
    n=1,
    stream=False,
    extra_body={"system_message": SYSTEM_MESSAGE},
)
display_content(completion.choices[0].text)

## Cloning Text to Speech

Any `wav` file in the `voices` directory can be used as a voice.


In [None]:
from pathlib import Path
import base64
import IPython.display as ipd

prompt = "Write a short poem about vikings with a picture."
os.makedirs("temp", exist_ok=True)
audio_path = os.path.join(os.getcwd(), "temp", f"test-speech.wav")
speech_file_path = Path(audio_path)
tts_response = openai.audio.speech.create(
    model="tts-1",
    voice="DukeNukem",
    input=prompt,
    extra_body={"language": "en"},
)
audio_content = base64.b64decode(tts_response.content)
speech_file_path.write_bytes(audio_content)
with open(audio_path, "wb") as audio_file:
    audio_file.write(audio_content)

ipd.Audio(speech_file_path)

## Audio to Text


In [None]:
with open(audio_path, "rb") as audio_file:
    transcription = openai.audio.transcriptions.create(model="base", file=audio_file)

print(transcription.text)

## Upload a Voice


In [None]:
upload_headers = HEADERS.copy()
del upload_headers["Content-Type"]
with open(audio_path, "rb") as audio_file:
    files = {"file": ("test-speech.wav", audio_file, "audio/wav")}
    data = {"voice": "Test"}
    response = requests.post(
        f"{EZLOCALAI_SERVER}/v1/audio/voices",
        files=files,
        data=data,
        headers=upload_headers,
    )
    print(response.json())

## Voice Completion Example


In [None]:
# We will use the audio response from a couple of cells back.
completion = openai.completions.create(
    model=DEFAULT_LLM,
    prompt=tts_response.content.decode("utf-8"),
    temperature=DEFAULT_TEMPERATURE,
    max_tokens=DEFAULT_MAX_TOKENS,
    top_p=DEFAULT_TOP_P,
    n=1,
    stream=False,
    extra_body={
        "system_message": SYSTEM_MESSAGE,
        "audio_format": "wav",
        "voice": "DukeNukem",
    },
)

response_text = completion.choices[0].text
display_content(response_text)

## Generate an Image


In [None]:
prompt = "Generate an image of a cat."
response = openai.images.generate(
    prompt=prompt,
    model="ByteDance/SDXL-Lightning",
    response_format="url",
)
image = response.data[0].url
display_content(image)