# Local-LLM Tests and Examples

Simply choose your favorite model of choice from the models list and paste it into the `model` variable on the API calls. You can get a list of models below.

Install OpenAI and requests:

```bash
pip install openai requests
```

**Note, you do not need an OpenAI API Key, the API Key is your `LOCAL_LLM_API_KEY` for the server if you defined one in your `.env` file.**

## Global definitions and helpers


In [153]:
import openai
import requests
import time

# Set your LOCAL_LLM_SERVER and LOCAL_LLM_API_KEY here for using the notebook.
LOCAL_LLM_SERVER = "http://localhost:8091"
LOCAL_LLM_API_KEY = "Your LOCAL_LLM_API_KEY from your .env file"
DEFAULT_LLM = "phi-2-dpo"
SYSTEM_MESSAGE = "Act as a creative writer. All of your responses are transcribed to audio and sent to the user. After each request is fulfilled, end with </s> before further explanation."
DEFAULT_MAX_TOKENS = 64
DEFAULT_TEMPERATURE = 0.3
DEFAULT_TOP_P = 0.90


# ------------------- DO NOT EDIT BELOW THIS LINE IN THIS CELL ------------------- #
openai.base_url = f"{LOCAL_LLM_SERVER}/v1/"
openai.api_key = LOCAL_LLM_API_KEY if LOCAL_LLM_API_KEY else LOCAL_LLM_SERVER
HEADERS = {
    "Content-Type": "application/json",
    "Authorization": f"{LOCAL_LLM_API_KEY}",
}


def display_content(content):
    global LOCAL_LLM_SERVER
    outputs_url = f"{LOCAL_LLM_SERVER}/outputs/"
    try:
        from IPython.display import Audio, display, Image, Video
    except:
        print(content)
        return
    if "<audio controls>" in content or " " not in content:
        import base64
        from datetime import datetime

        try:
            audio_response = content.split("data:audio/wav;base64,")[1].split('" type')[
                0
            ]
        except:
            audio_response = content
        file_name = f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}.wav"
        with open(file_name, "wb") as fh:
            fh.write(base64.b64decode(audio_response))
        display(Audio(filename=file_name, autoplay=True))
    if outputs_url in content:
        file_name = content.split(outputs_url)[1].split('"')[0]
        url = f"{outputs_url}{file_name}"
        if url.endswith(".jpg") or url.endswith(".png"):
            content = content.replace(url, "")
            display(Image(url=url))
        elif url.endswith(".mp4"):
            content = content.replace(url, "")
            display(Video(url=url, autoplay=True))
        elif url.endswith(".wav"):
            content = content.replace(url, "")
            print(f"URL: {url}")
            display(Audio(url=url, autoplay=True))
    print(content)

## Language Models

Get a list of models to choose from if you don't already know what model you want to use.


In [154]:
# Wait for server to come up instead of timing out.
while True:
    try:
        models = requests.get(f"{LOCAL_LLM_SERVER}/v1/models", headers=HEADERS)
        if models.status_code == 200:
            break
    except:
        pass
    time.sleep(1)

print(models.json())

['bakllava-1-7b', 'llava-v1.5-7b', 'llava-v1.5-13b', 'yi-vl-6b', 'Goliath-longLORA-120b-rope8-32k-fp16', 'Goliath-longLORA-120b-rope8-32k-fp16', 'Etheria-55b-v0.1', 'EstopianMaid-13B', 'Everyone-Coder-33B-Base', 'FusionNet_34Bx2_MoE', 'WestLake-7B-v2', 'WestSeverus-7B-DPO', 'DiscoLM_German_7b_v1', 'Garrulus', 'DareVox-7B', 'NexoNimbus-7B', 'Lelantos-Maid-DPO-7B', 'stable-code-3b', 'Dr_Samantha-7B', 'NeuralBeagle14-7B', 'tigerbot-13B-chat-v5', 'Nous-Hermes-2-Mixtral-8x7B-SFT', 'Thespis-13B-DPO-v0.7', 'Code-290k-13B', 'Nous-Hermes-2-Mixtral-8x7B-DPO', 'Venus-120b-v1.2', 'LLaMA2-13B-Estopia', 'medicine-LLM', 'finance-LLM-13B', 'Yi-34B-200K-DARE-megamerge-v8', 'phi-2-orange', 'laser-dolphin-mixtral-2x7b-dpo', 'bagel-dpo-8x7b-v0.2', 'Everyone-Coder-4x7b-Base', 'phi-2-electrical-engineering', 'Cosmosis-3x34B', 'HamSter-0.1', 'Helion-4x34B', 'Bagel-Hermes-2x34b', 'deepmoney-34b-200k-chat-evaluator', 'deepmoney-34b-200k-base', 'TowerInstruct-7B-v0.1', 'PiVoT-SUS-RP', 'Noromaid-v0.4-Mixtral-Ins

## Voices

Any `wav` file in the `voices` directory will be available to use as a voice.


In [155]:
voices = requests.get(f"{LOCAL_LLM_SERVER}/v1/audio/voices", headers=HEADERS)
print(voices.json())

{'voices': ['default', 'DukeNukem', 'Hal9000_Mono', 'Hal_voice_9000_Synthetic', 'SyntheticStarTrekComputerVoice', 'Synthetic_DukeNukem', 'Synthetic_Female_Hybrid_4_Phonetics_0001', 'Synthetic_Female_Phonetics_0001']}


## Embeddings

[OpenAI API Reference](https://platform.openai.com/docs/api-reference/embeddings)


In [156]:
# Modify this prompt to generate different outputs
prompt = "Tacos are great."

response = openai.embeddings.create(
    input=prompt,
    model=DEFAULT_LLM,
)
print(response.data[0].embedding)

[0.3090707063674927, 1.1029287576675415, 2.3656630516052246, -0.4048413932323456, 1.0616954565048218, -0.38123324513435364, 1.000983476638794, 1.3528428077697754, -0.8648228645324707, 1.5190966129302979, 0.675586998462677, -0.19846737384796143, 0.5568936467170715, 0.06692102551460266, -0.31205207109451294, -0.31372374296188354, 0.7479310035705566, 1.7122050523757935, 0.11156223714351654, -0.38010597229003906, -2.0285604000091553, -0.9324357509613037, -0.26883846521377563, -0.41842585802078247, 0.7723556160926819, 1.2539050579071045, 0.4468786418437958, 1.082922339439392, 0.3584991693496704, -0.42915067076683044, -0.6801806092262268, -1.2013731002807617, 0.0065938811749219894, 0.5070878267288208, 0.6012413501739502, -0.36088356375694275, -0.8070286512374878, -0.42242664098739624, 1.5350435972213745, -1.8791346549987793, 0.43988990783691406, 0.705127477645874, 0.6156477332115173, 1.2618300914764404, -0.5392323732376099, 0.47247421741485596, 0.51447594165802, -0.5469154715538025, 0.130050

## Chat Completion

[OpenAI API Reference](https://platform.openai.com/docs/api-reference/chat)


In [157]:
# Modify this prompt to generate different outputs
prompt = "Write a haiku about Taco Bell's Doritos Locos Tacos."


response = openai.chat.completions.create(
    model=DEFAULT_LLM,
    messages=[{"role": "user", "content": prompt}],
    temperature=DEFAULT_TEMPERATURE,
    max_tokens=DEFAULT_MAX_TOKENS,
    top_p=DEFAULT_TOP_P,
    stream=False,
    extra_body={"system_message": SYSTEM_MESSAGE},
)
display_content(response.messages[1]["content"])

Crunchy shells burst,
  Cheesy salsa and beef delight,
  Loco flavors reign supreme.


Taco Bell's Locos Tacos,
A fiery feast for the senses,
Spicy, savory, and bold.


## Completion

[OpenAI API Reference](https://platform.openai.com/docs/api-reference/completions/create)


In [158]:
# Modify this prompt to generate different outputs
prompt = "Write a haiku about Taco Bell's Doritos Locos Tacos."

completion = openai.completions.create(
    model=DEFAULT_LLM,
    prompt=prompt,
    temperature=DEFAULT_TEMPERATURE,
    max_tokens=DEFAULT_MAX_TOKENS,
    top_p=DEFAULT_TOP_P,
    n=1,
    stream=False,
    extra_body={"system_message": SYSTEM_MESSAGE},
)
display_content(completion.choices[0].text)

Crunchy shells burst,
  Cheesy goodness and salsa dance,
  Taco Bell delight.


Note that a haiku is a traditional form of Japanese poetry consisting of three lines with 5 syllables in the first line, 7 syllables in the second line, and 5 syllables in the third line


## Cloning Text to Speech

Any `wav` file in the `voices` directory can be used as a voice.


In [164]:
prompt = "Write a haiku about Taco Bell's Doritos Locos Tacos."
response = requests.post(
    f"{LOCAL_LLM_SERVER}/v1/audio/generation",
    headers=HEADERS,
    json={
        "text": prompt,
        "voice": "DukeNukem",
        "language": "en",
    },
)
audio_response = response.json()
display_content(audio_response["data"])

UklGRkb+AgBXQVZFZm10IBAAAAABAAEAwF0AAIC7AAACABAATElTVBoAAABJTkZPSVNGVA4AAABMYXZmNTguNzYuMTAwAGRhdGEA/gIAEQAIABgAKgA0AD0ATwBKAEAARABNAFAATgBQAE8ASQBKAFEATABEADYAOwA2ADEALgAqACYAIQAiACIAEgAMAAAA7//m/+D/3//S/8T/sf+d/5H/gP98/27/bv90/3T/dP9//4j/ff93/2j/aP9e/1j/Uf9S/1n/U/9k/2P/XP9d/27/d/90/3//iv+M/4v/nP+X/5b/of+h/5L/kf+p/6L/ov+c/5//ov+4/7r/wf/D/7z/yv/T/9b/1//V/9f/2//k//b/9v/t//T/9P/q/+H/7v/1//X/AAADAPr/CwAOAPn//f/+//v/7f/a/9j/1//t//b/8P/o/9v/4P/e/9z/7v/1//n/+//0//X/BAACAPj/8P/t/+L/3v/v/+n/5v/d/8//0P/Y/9j/1//L/7r/uf+w/63/qv+z/6j/qv+4/6j/qf+6/7n/wP/D/8D/vP++/7P/p/+d/6v/of+a/6H/ov+5/7f/xP/l/+v/AgAWACMAGAAcABsADwAYAPz/CwABAPv/DgAEAAcABQANABMAFgAjAB0AKQAwADMARABOAF4AVwBfAEsARwA4ADwAWQBPAFgAWwB7AIcAkQCkAKwAswCsAKoAtwC1AJ4AnACPAIoAkQCmAMoAywDeAPAA9wAbASIBIAEPAf4A8wDlANQAzQDEALgApQCiAKUAqACuAKsAsgCuALEAswC5AM0A3gDrAPkAGgFGATABIgEEAcQAnQCcAKQAngCPAGsAMwD6//H/1v+x/5L/hf+E/6T/p/+//9v/5P/d/9//zP+v/4r/aP9I/07/Kv8Y/xD/FP8m/yT/Ef8G/w//F/8j/xv/Hv8f/zn/Ov9B/1b/Yv9n/2j/df+V/67/vP/S/+//7/8DACAATQBiAHYAngCqAKIA

## Text to Speech


In [165]:
# We will use the audio response from the previous cell to transcribe it.
transcription = requests.post(
    f"{LOCAL_LLM_SERVER}/v1/audio/transcriptions",
    json={
        "file": audio_response["data"],
        "audio_format": "wav",
        "model": "base.en",
    },
    headers=HEADERS,
)


print(transcription.json())

{'data': " Write a haiku about Taco Bell's Doritos Locos Tacos."}


## Voice Completion Example


In [166]:
# We will use the audio response from a couple of cells back.
completion = openai.completions.create(
    model=DEFAULT_LLM,
    prompt=audio_response["data"],
    temperature=DEFAULT_TEMPERATURE,
    max_tokens=DEFAULT_MAX_TOKENS,
    top_p=DEFAULT_TOP_P,
    n=1,
    stream=False,
    extra_body={
        "system_message": SYSTEM_MESSAGE,
        "audio_format": "wav",
        "voice": "DukeNukem",
    },
)

response_text = completion.choices[0].text
display_content(response_text)

URL: http://localhost:8091/outputs/bf41df6970294b789277ab6947e2dca7.wav


Crunchy shells, hot, spicy,
  Doritos Locos Tacos, the bite,
  A flavor explosion.


Taco Bell's Locos Tacos,
The crunch of the shell and spice,
A taste that can't be missed!

