# Local-LLM Tests and Examples

Simply choose your favorite model of choice from the models list and paste it into the `model` variable on the API calls. You can get a list of models below.

Install OpenAI and requests:

```bash
pip install openai requests
```

**Note, you do not need an OpenAI API Key, the API Key is your `LOCAL_LLM_API_KEY` for the server if you defined one in your `.env` file.**

## Global definitions and helpers


In [189]:
import openai
import requests
import time

# Set your LOCAL_LLM_SERVER and LOCAL_LLM_API_KEY here for using the notebook.
LOCAL_LLM_SERVER = "http://localhost:8091"
LOCAL_LLM_API_KEY = "Your LOCAL_LLM_API_KEY from your .env file"
DEFAULT_LLM = "zephyr-7b-beta"
SYSTEM_MESSAGE = "Act as a creative writer. All of your responses are transcribed to audio and sent to the user. Be concise with all responses. After each request is fulfilled, end with </s> before further explanation."
DEFAULT_MAX_TOKENS = 64
DEFAULT_TEMPERATURE = 0.3
DEFAULT_TOP_P = 0.90


# ------------------- DO NOT EDIT BELOW THIS LINE IN THIS CELL ------------------- #
openai.base_url = f"{LOCAL_LLM_SERVER}/v1/"
openai.api_key = LOCAL_LLM_API_KEY if LOCAL_LLM_API_KEY else LOCAL_LLM_SERVER
HEADERS = {
    "Content-Type": "application/json",
    "Authorization": f"{LOCAL_LLM_API_KEY}",
}


def display_content(content):
    global LOCAL_LLM_SERVER
    outputs_url = f"{LOCAL_LLM_SERVER}/outputs/"
    try:
        from IPython.display import Audio, display, Image, Video
    except:
        print(content)
        return
    if "<audio controls>" in content or " " not in content:
        import base64
        from datetime import datetime

        try:
            audio_response = content.split("data:audio/wav;base64,")[1].split('" type')[
                0
            ]
        except:
            audio_response = content
        file_name = f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}.wav"
        with open(file_name, "wb") as fh:
            fh.write(base64.b64decode(audio_response))
        display(Audio(filename=file_name, autoplay=True))
    if outputs_url in content:
        file_name = content.split(outputs_url)[1].split('"')[0]
        url = f"{outputs_url}{file_name}"
        if url.endswith(".jpg") or url.endswith(".png"):
            content = content.replace(url, "")
            display(Image(url=url))
        elif url.endswith(".mp4"):
            content = content.replace(url, "")
            display(Video(url=url, autoplay=True))
        elif url.endswith(".wav"):
            content = content.replace(url, "")
            print(f"URL: {url}")
            display(Audio(url=url, autoplay=True))
    print(content)

## Language Models

Get a list of models to choose from if you don't already know what model you want to use.


In [190]:
# Wait for server to come up instead of timing out.
while True:
    try:
        models = requests.get(f"{LOCAL_LLM_SERVER}/v1/models", headers=HEADERS)
        if models.status_code == 200:
            break
    except:
        pass
    time.sleep(1)

print(models.json())

['bakllava-1-7b', 'llava-v1.5-7b', 'llava-v1.5-13b', 'yi-vl-6b', 'Goliath-longLORA-120b-rope8-32k-fp16', 'Goliath-longLORA-120b-rope8-32k-fp16', 'Etheria-55b-v0.1', 'EstopianMaid-13B', 'Everyone-Coder-33B-Base', 'FusionNet_34Bx2_MoE', 'WestLake-7B-v2', 'WestSeverus-7B-DPO', 'DiscoLM_German_7b_v1', 'Garrulus', 'DareVox-7B', 'NexoNimbus-7B', 'Lelantos-Maid-DPO-7B', 'stable-code-3b', 'Dr_Samantha-7B', 'NeuralBeagle14-7B', 'tigerbot-13B-chat-v5', 'Nous-Hermes-2-Mixtral-8x7B-SFT', 'Thespis-13B-DPO-v0.7', 'Code-290k-13B', 'Nous-Hermes-2-Mixtral-8x7B-DPO', 'Venus-120b-v1.2', 'LLaMA2-13B-Estopia', 'medicine-LLM', 'finance-LLM-13B', 'Yi-34B-200K-DARE-megamerge-v8', 'phi-2-orange', 'laser-dolphin-mixtral-2x7b-dpo', 'bagel-dpo-8x7b-v0.2', 'Everyone-Coder-4x7b-Base', 'phi-2-electrical-engineering', 'Cosmosis-3x34B', 'HamSter-0.1', 'Helion-4x34B', 'Bagel-Hermes-2x34b', 'deepmoney-34b-200k-chat-evaluator', 'deepmoney-34b-200k-base', 'TowerInstruct-7B-v0.1', 'PiVoT-SUS-RP', 'Noromaid-v0.4-Mixtral-Ins

## Voices

Any `wav` file in the `voices` directory will be available to use as a voice.


In [191]:
voices = requests.get(f"{LOCAL_LLM_SERVER}/v1/audio/voices", headers=HEADERS)
print(voices.json())

{'voices': ['default', 'DukeNukem', 'Hal9000_Mono', 'Hal_voice_9000_Synthetic', 'SyntheticStarTrekComputerVoice', 'Synthetic_DukeNukem', 'Synthetic_Female_Hybrid_4_Phonetics_0001', 'Synthetic_Female_Phonetics_0001']}


## Embeddings

[OpenAI API Reference](https://platform.openai.com/docs/api-reference/embeddings)


In [192]:
# Modify this prompt to generate different outputs
prompt = "Tacos are great."

response = openai.embeddings.create(
    input=prompt,
    model=DEFAULT_LLM,
)
print(response.data[0].embedding)

[0.6263254284858704, -8.25033950805664, -5.264184951782227, 5.946328163146973, 0.20876923203468323, -3.226299285888672, 4.364407539367676, -3.8509955406188965, -4.968028545379639, -1.3412210941314697, 0.28643423318862915, 0.972560703754425, 0.5281611680984497, 7.541004180908203, -11.472905158996582, 1.4199142456054688, 2.094740629196167, -4.318964958190918, 1.5097229480743408, -1.662405252456665, -1.6664537191390991, 0.39180639386177063, -1.926039457321167, 1.4305297136306763, -2.7235515117645264, -3.1501927375793457, -0.0675605908036232, -0.5283030867576599, -7.408465385437012, 0.6446126699447632, 7.874179840087891, -0.9269484281539917, 1.6204450130462646, 1.699781060218811, -0.5311153531074524, -4.224623680114746, -2.3510303497314453, 0.44627895951271057, -0.25287649035453796, -0.690187394618988, 3.0931148529052734, -9.374205589294434, 8.91468334197998, 0.025610078126192093, 6.076512336730957, -1.5289697647094727, -4.2362494468688965, -3.504779815673828, 2.98178768157959, -1.43094718

## Chat Completion

[OpenAI API Reference](https://platform.openai.com/docs/api-reference/chat)


In [193]:
# Modify this prompt to generate different outputs
prompt = "Write a haiku about Taco Bell's Doritos Locos Tacos."


response = openai.chat.completions.create(
    model=DEFAULT_LLM,
    messages=[{"role": "user", "content": prompt}],
    temperature=DEFAULT_TEMPERATURE,
    max_tokens=DEFAULT_MAX_TOKENS,
    top_p=DEFAULT_TOP_P,
    stream=False,
    extra_body={"system_message": SYSTEM_MESSAGE},
)
display_content(response.messages[1]["content"])

Crunchy shell,
   Spicy Doritos dance,
   Taco Bell bliss.


## Completion

[OpenAI API Reference](https://platform.openai.com/docs/api-reference/completions/create)


In [194]:
# Modify this prompt to generate different outputs
prompt = "Write a haiku about Taco Bell's Doritos Locos Tacos."

completion = openai.completions.create(
    model=DEFAULT_LLM,
    prompt=prompt,
    temperature=DEFAULT_TEMPERATURE,
    max_tokens=DEFAULT_MAX_TOKENS,
    top_p=DEFAULT_TOP_P,
    n=1,
    stream=False,
    extra_body={"system_message": SYSTEM_MESSAGE},
)
display_content(completion.choices[0].text)

Crunchy shell,
   Spicy Doritos dance,
   Taco Bell bliss.


## Cloning Text to Speech

Any `wav` file in the `voices` directory can be used as a voice.


In [195]:
prompt = "Write a haiku about Taco Bell's Doritos Locos Tacos."
response = requests.post(
    f"{LOCAL_LLM_SERVER}/v1/audio/generation",
    headers=HEADERS,
    json={
        "text": prompt,
        "voice": "DukeNukem",
        "language": "en",
    },
)
audio_response = response.json()
display_content(audio_response["data"])

UklGRkYGAwBXQVZFZm10IBAAAAABAAEAwF0AAIC7AAACABAATElTVBoAAABJTkZPSVNGVA4AAABMYXZmNTguNzYuMTAwAGRhdGEABgMAAwD7/wsAFwAYACIAJgApACoALQA5AD4AQAA+AEMAQwBBAEYASgBJAEMASQBKAEsAQwBHAEEAPgA8AD0ANwA6ADsANAAyADkAPAA2ADYALwA0AC0ALQAuACoAKwAqACcAJwAlACkAJQAkAB4AIAAiAB8AHwAhAB4AHAAiAB0AHAATABcAFQAYABsAIAAiABcAIAAaABsAHQAeABUAGgAkACMAJQAcACAAHQAlACcAKQAnACUAKAAuACwAMAArACgAKQApADEAMQAnACsAKQArACoALgAzADAANgAzAC8ANAAyAC4AKwAsACwALQAoACkAJQArADEAKwAlACIAJAAeABwAHwAcACAAHQAaABYAGwAfABUAFAAWABAAFgAdAB4AIQAbABgAFgAbACMAIwAiAB0AHAAbABsAIAAjABoAIQAgABwAGgAkAB0AIwApACMAIAAjAB4AGAAWACIAIQAcAB0AGgAhABUAFAAdABUAFQAXABEADQAPAA4AFwARABQAFAANAA0AEwASAAwADAAOAA4ADQAGAAYAAwACAAAABwADAAIAAwACAAIA9v/u/+//9P/u/+v/4P/o/97/4f/p/+n/7f/k/+P/6//v//H/7P/t/+j/3f/h/+z/6v/l/+T/4v/i/+P/7f/t/+3/6//r/+j/7P/z//X/9//8/wAA//8GAPz/AgAAAAEAAwABAAMABgAEAAsADwAYABEAEAAbAAoAAAAAAPn/8/8BAAkAAAACAAwAEQAUAAgACAAEAA0ACAAGAAwAAgABAAcAAAAFAPX/9//3//z/AAD9//z//f8LAAgA+v//////BQAAAPv/DQAFAAwAAgD7/wAAAAD+/wIA7f/s//r//v/3/wIACAAFAAgABAD4//L/+f8EAP3/

## Text to Speech


In [196]:
# We will use the audio response from the previous cell to transcribe it.
transcription = requests.post(
    f"{LOCAL_LLM_SERVER}/v1/audio/transcriptions",
    json={
        "file": audio_response["data"],
        "audio_format": "wav",
        "model": "base.en",
    },
    headers=HEADERS,
)


print(transcription.json())

{'data': " Write a haiku about Taco Bell's Doritos Locos Tacos."}


## Voice Completion Example


In [197]:
# We will use the audio response from a couple of cells back.
completion = openai.completions.create(
    model=DEFAULT_LLM,
    prompt=audio_response["data"],
    temperature=DEFAULT_TEMPERATURE,
    max_tokens=DEFAULT_MAX_TOKENS,
    top_p=DEFAULT_TOP_P,
    n=1,
    stream=False,
    extra_body={
        "system_message": SYSTEM_MESSAGE,
        "audio_format": "wav",
        "voice": "DukeNukem",
    },
)

response_text = completion.choices[0].text
display_content(response_text)

URL: http://localhost:8091/outputs/10472795804f491db9f40c178035c5c9.wav


Crunchy shell,
   Spicy Doritos delight,
   Taco Bell bliss.

