In [1]:
from enum import Enum
import openai

from keymaker.models import chatgpt, gpt4, LlamaCpp
from keymaker.constraints import RegexConstraint, OptionsConstraint
from keymaker import Prompt, Completion

In [2]:
openai.api_key = ""

In [3]:
chat_model=chatgpt()

In [4]:
user_msg = "The weather will be really bad tomorrow what should I wear?"

In [5]:
prompt_template=f"""
%system%You are an AI assistant that chooses tools for tasks based on a user task and tool descriptions.
Here are the tools you can choose from:
    Tool A: Looks up information on animals
    Tool B: Finds peoples geolocation based on triangulating their phone data
    Tool C: Finds the best outfit to wear today based on the weather
%/system%
%user%{user_msg}
Explain your reasoning in 50 words or less and then choose the appropriate tool.
%/user%
"""

In [6]:
async def print_stream(completion: Completion):
    print(repr(completion))

In [7]:
prompt = Prompt(prompt_template)

In [8]:
prompt = await prompt.complete(model=chat_model, max_tokens=100, stream=print_stream, name='explanation')

Completion(text = 'Based', start = 502, stop = 507)
Completion(text = ' on', start = 507, stop = 510)
Completion(text = ' the', start = 510, stop = 514)
Completion(text = ' user', start = 514, stop = 519)
Completion(text = ''s', start = 519, stop = 521)
Completion(text = ' request', start = 521, stop = 529)
Completion(text = ' to', start = 529, stop = 532)
Completion(text = ' find', start = 532, stop = 537)
Completion(text = ' the', start = 537, stop = 541)
Completion(text = ' best', start = 541, stop = 546)
Completion(text = ' outfit', start = 546, stop = 553)
Completion(text = ' to', start = 553, stop = 556)
Completion(text = ' wear', start = 556, stop = 561)
Completion(text = ' tomorrow', start = 561, stop = 570)
Completion(text = ',', start = 570, stop = 571)
Completion(text = ' the', start = 571, stop = 575)
Completion(text = ' appropriate', start = 575, stop = 587)
Completion(text = ' tool', start = 587, stop = 592)
Completion(text = ' to', start = 592, stop = 595)
Completion(tex

In [9]:
prompt.completions.explanation

Completion(text = 'Based on the user's request to find the best outfit to wear tomorrow, the appropriate tool to use would be Tool C. This tool can analyze the weather conditions and provide recommendations on what to wear based on the forecasted weather.', start = 502, stop = 738)

In [10]:
llama_model = LlamaCpp(model_path="/Users/nick/Downloads/orca-mini-v2_7b.ggmlv3.q3_K_S.bin")

llama.cpp: loading model from /Users/nick/Downloads/orca-mini-v2_7b.ggmlv3.q3_K_S.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 11 (mostly Q3_K - Small)
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: mem required  = 4603.09 MB (+ 1026.00 MB per state)
llama_new_context_with_model: kv self size  =  256.00 MB
AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | 


In [11]:
constraint = OptionsConstraint({'A', 'B', 'C'})

In [12]:
llama_prompt = Prompt(prompt.completions.explanation)+"""
User: Based on this explanation which tool should be chosen?
Assistant:"""

In [13]:
choice_prompt = await llama_prompt.complete(model=llama_model, constraint=constraint)


llama_print_timings:        load time =  6456.59 ms
llama_print_timings:      sample time =     0.72 ms /     1 runs   (    0.72 ms per token,  1392.76 tokens per second)
llama_print_timings: prompt eval time =  6456.54 ms /    68 tokens (   94.95 ms per token,    10.53 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =  6461.02 ms


In [14]:
class ToolKinds(Enum):
    A = 'A'
    B = 'B'
    C = 'C'

In [15]:
async def choose_a_tool(prompt, choices_enum: Enum)->ToolKinds:
    constraint = OptionsConstraint({e.value for e in choices_enum})
    return choices_enum((await prompt.complete(model=llama_model, constraint=constraint)).completions[0])

In [16]:
tool_choice: ToolKinds = await choose_a_tool(llama_prompt, ToolKinds)

Llama.generate: prefix-match hit

llama_print_timings:        load time =  6456.59 ms
llama_print_timings:      sample time =     0.68 ms /     1 runs   (    0.68 ms per token,  1472.75 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   147.31 ms /     1 runs   (  147.31 ms per token,     6.79 tokens per second)
llama_print_timings:       total time =   151.46 ms
