In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

# Some things this notebook demonstrates:
- creating models for use completing prompts
- creating prompts
  - as templates
  - for use with both chat based models as well as ordinary completion
- basic use of completions
  - naming completions
  - limiting the number of tokens
- writing control-flow around prompts and completions
  - swapping models
    - even between chat and non-chat models
  - manipulating a prompt between completions
  - tool-choice
- constraints
  - using a simple options constrain with an enum
- streaming completion output
- completion confidence

In [2]:
from enum import Enum
import openai

from keymaker.models import chatgpt, gpt4, LlamaCpp
from keymaker.constraints import RegexConstraint, OptionsConstraint
from keymaker import Prompt, Completion

import json

with open("config.json") as f:
    openai.api_key = json.loads(f.read())["OPENAI_API_KEY"]

chat_model = chatgpt()

<IPython.core.display.Javascript object>

In [3]:
user_msg = "The weather will be really bad tomorrow what should I wear?"

<IPython.core.display.Javascript object>

In [4]:
prompt_template = f"""
%system%You are an AI assistant that chooses tools for tasks based on a user task and tool descriptions.
Here are the tools you can choose from:
    Tool A: Looks up information on animals
    Tool B: Finds peoples geolocation based on triangulating their phone data
    Tool C: Finds the best outfit to wear today based on the weather
%/system%
%user%{user_msg}
Explain your reasoning in 50 words or less and then choose the appropriate tool.
%/user%
"""

<IPython.core.display.Javascript object>

In [5]:
async def print_stream(completion: Completion):
    print(repr(completion))

<IPython.core.display.Javascript object>

In [6]:
prompt = Prompt(prompt_template)

<IPython.core.display.Javascript object>

In [7]:
prompt = await prompt.complete(
    model=chat_model, max_tokens=100, stream=print_stream, name="explanation"
)

Completion(text='Based', start=502, stop=507, name=explanation, chunk=True, score=None)
Completion(text=' on', start=507, stop=510, name=explanation, chunk=True, score=None)
Completion(text=' the', start=510, stop=514, name=explanation, chunk=True, score=None)
Completion(text=' user', start=514, stop=519, name=explanation, chunk=True, score=None)
Completion(text=''s', start=519, stop=521, name=explanation, chunk=True, score=None)
Completion(text=' request', start=521, stop=529, name=explanation, chunk=True, score=None)
Completion(text=',', start=529, stop=530, name=explanation, chunk=True, score=None)
Completion(text=' the', start=530, stop=534, name=explanation, chunk=True, score=None)
Completion(text=' appropriate', start=534, stop=546, name=explanation, chunk=True, score=None)
Completion(text=' tool', start=546, stop=551, name=explanation, chunk=True, score=None)
Completion(text=' to', start=551, stop=554, name=explanation, chunk=True, score=None)
Completion(text=' use', start=554, 

<IPython.core.display.Javascript object>

In [10]:
prompt.completions.explanation

Completion(text='Based on the user's request, the appropriate tool to use would be Tool C, which finds the best outfit to wear based on the weather. This tool will analyze the weather conditions for tomorrow and provide recommendations on what to wear to stay comfortable and protected from the bad weather.', start=502, stop=792, name=explanation, chunk=False, score=None)

<IPython.core.display.Javascript object>

In [11]:
llama_model = LlamaCpp(
    model_path="/Users/nick/Downloads/llama-2-7b-chat.ggmlv3.q3_K_S.bin"
)

llama.cpp: loading model from /Users/nick/Downloads/llama-2-7b-chat.ggmlv3.q3_K_S.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 11 (mostly Q3_K - Small)
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: mem required  = 4603.09 MB (+ 1026.00 MB per state)
llama_new_context_with_model: kv self size  =  256.00 MB
AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | 


<IPython.core.display.Javascript object>

In [12]:
constraint = OptionsConstraint({"A", "B", "C"})

<IPython.core.display.Javascript object>

In [13]:
llama_prompt = (
    Prompt(prompt.completions.explanation)
    + """
User: Based on this explanation which tool should be chosen?
Assistant: Tool """
)

<IPython.core.display.Javascript object>

In [14]:
choice_prompt = await llama_prompt.complete(model=llama_model, constraint=constraint)


llama_print_timings:        load time = 10224.36 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1457.73 tokens per second)
llama_print_timings: prompt eval time = 10224.33 ms /    79 tokens (  129.42 ms per token,     7.73 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time = 10327.40 ms


<IPython.core.display.Javascript object>

In [15]:
choice_prompt.completions

Completions([Completion(text='C', start=368, stop=369, name=None, chunk=False, score=0.9989875090832618)], {})

<IPython.core.display.Javascript object>

In [16]:
class ToolKinds(Enum):
    A = "A"
    B = "B"
    C = "C"

<IPython.core.display.Javascript object>

In [17]:
async def choose_a_tool(prompt, choices_enum: Enum) -> ToolKinds:
    constraint = OptionsConstraint({e.value for e in choices_enum})
    return choices_enum(
        (await prompt.complete(model=llama_model, constraint=constraint)).completions[0]
    )

<IPython.core.display.Javascript object>

In [18]:
tool_choice: ToolKinds = await choose_a_tool(llama_prompt, ToolKinds)

Llama.generate: prefix-match hit

llama_print_timings:        load time = 10224.36 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1459.85 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   839.66 ms /     1 runs   (  839.66 ms per token,     1.19 tokens per second)
llama_print_timings:       total time =   845.45 ms


<IPython.core.display.Javascript object>

In [19]:
tool_choice

<ToolKinds.C: 'C'>

<IPython.core.display.Javascript object>

In [20]:
llama_model.tokens

{0: ' ⁇ ',
 4: '\x01',
 5: '\x02',
 6: '\x03',
 7: '\x04',
 8: '\x05',
 9: '\x06',
 10: '\x07',
 11: '\x08',
 12: '\t',
 13: '\n',
 14: '\x0b',
 15: '\x0c',
 16: '\r',
 17: '\x0e',
 18: '\x0f',
 19: '\x10',
 20: '\x11',
 21: '\x12',
 22: '\x13',
 23: '\x14',
 24: '\x15',
 25: '\x16',
 26: '\x17',
 27: '\x18',
 28: '\x19',
 29: '\x1a',
 30: '\x1b',
 31: '\x1c',
 32: '\x1d',
 33: '\x1e',
 34: '\x1f',
 35: ' ',
 36: '!',
 37: '"',
 38: '#',
 39: '$',
 40: '%',
 41: '&',
 42: "'",
 43: '(',
 44: ')',
 45: '*',
 46: '+',
 47: ',',
 48: '-',
 49: '.',
 50: '/',
 51: '0',
 52: '1',
 53: '2',
 54: '3',
 55: '4',
 56: '5',
 57: '6',
 58: '7',
 59: '8',
 60: '9',
 61: ':',
 62: ';',
 63: '<',
 64: '=',
 65: '>',
 66: '?',
 67: '@',
 68: 'A',
 69: 'B',
 70: 'C',
 71: 'D',
 72: 'E',
 73: 'F',
 74: 'G',
 75: 'H',
 76: 'I',
 77: 'J',
 78: 'K',
 79: 'L',
 80: 'M',
 81: 'N',
 82: 'O',
 83: 'P',
 84: 'Q',
 85: 'R',
 86: 'S',
 87: 'T',
 88: 'U',
 89: 'V',
 90: 'W',
 91: 'X',
 92: 'Y',
 93: 'Z',
 94: '['

<IPython.core.display.Javascript object>