In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import openai

from keymaker.models import chatgpt, gpt4, LlamaCpp
from keymaker.constraints import RegexConstraint, OptionsConstraint, StopsConstraint
from keymaker import Prompt, Completion, CompletionConfig

import json

with open("./examples/config.json") as f:
    openai.api_key = json.loads(f.read())["OPENAI_API_KEY"]

<IPython.core.display.Javascript object>

In [3]:
from typing import Optional


async def print_stream(completion: Optional[Completion]):
    if completion:
        print(repr(completion))


async def yo_stream(completion: Optional[Completion]):
    if completion:
        print("YO " + completion)

<IPython.core.display.Javascript object>

In [4]:
chat_model = chatgpt()

llama_model = LlamaCpp(
    model_path="/Users/nick/Downloads/llama-2-7b-chat.ggmlv3.q3_K_S.bin"
)

llama.cpp: loading model from /Users/nick/Downloads/llama-2-7b-chat.ggmlv3.q3_K_S.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 11 (mostly Q3_K - Small)
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: mem required  = 4603.09 MB (+ 1026.00 MB per state)
llama_new_context_with_model: kv self size  =  256.00 MB
AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | 


<IPython.core.display.Javascript object>

In [5]:
from datetime import datetime

user_message = "Hi, my name is Nick."

<IPython.core.display.Javascript object>

In [6]:
my_math_answer = None


def store_my_math(answer):
    global my_math_answer
    my_math_answer = int(answer)
    if my_math_answer != 15:
        return "Duh me no know."
    return 15

<IPython.core.display.Javascript object>

In [7]:
# default model, stream and max_tokens for all completions
prompt = Prompt(
    """Time: {time}
User: {user_msg}
Assistant: Hello, {}{punctuation}
User: Can you write me a poem about a superhero named pandaman being a friend to {}?
Assistant:{poem}
User: What is 10+5?
Assistant: The answer is 10+5={math}

The final answer is {fin}!
""",
    chat_model,
    stream=print_stream,
    max_tokens=25,
)

filled_in = await prompt.format(
    # custom constraint
    CompletionConfig(constraint=OptionsConstraint({"Sam", "Nick"}), stream=yo_stream),
    lambda p: p.completions[0],
    # can do formatting just like a regular python string
    punctuation="!",
    user_msg=user_message,
    time=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    poem=CompletionConfig(
        llama_model,
        max_tokens=250,
        constraint=StopsConstraint("User|Assistant", include=False),
    ),
    math=CompletionConfig(
        llama_model,
        constraint=RegexConstraint("[0-9]+", terminate_on_match=False),
        map_fn=store_my_math,
    ),
    fin=lambda p: CompletionConfig(
        llama_model,
        constraint=RegexConstraint(rf"{p.completions.math}|16"),
        map_fn=store_my_math,
    ),
)

YO Nick



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1459.85 tokens per second)
llama_print_timings: prompt eval time =  9736.51 ms /    69 tokens (  141.11 ms per token,     7.09 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =  9822.76 ms
Llama.generate: prefix-match hit


Completion(text=' Of', start=6, stop=9, name=poem, chunk=True, score=0.9953074114416708)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1408.45 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =  1834.80 ms /     1 runs   ( 1834.80 ms per token,     0.55 tokens per second)
llama_print_timings:       total time =  1837.49 ms
Llama.generate: prefix-match hit


Completion(text=' course', start=17, stop=24, name=poem, chunk=True, score=0.9998058923972919)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.72 ms /     1 runs   (    0.72 ms per token,  1386.96 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   341.54 ms /     1 runs   (  341.54 ms per token,     2.93 tokens per second)
llama_print_timings:       total time =   343.82 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1445.09 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    97.29 ms /     1 runs   (   97.29 ms per token,    10.28 tokens per second)
llama_print_timings:       total time =    99.62 ms
Llama.generate: prefix-match hit


Completion(text=',', start=12, stop=13, name=poem, chunk=True, score=0.7294512122618273)
Completion(text=' I', start=15, stop=17, name=poem, chunk=True, score=0.6383506266375113)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.72 ms /     1 runs   (    0.72 ms per token,  1379.31 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    81.78 ms /     1 runs   (   81.78 ms per token,    12.23 tokens per second)
llama_print_timings:       total time =    84.00 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1453.49 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   100.09 ms /     1 runs   (  100.09 ms per token,     9.99 tokens per second)
llama_print_timings:       total time =   102.42 ms
Llama.generate: prefix-match hit


Completion(text=''', start=15, stop=16, name=poem, chunk=True, score=0.9955273932566794)
Completion(text='d', start=16, stop=17, name=poem, chunk=True, score=0.9873862141470847)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1434.72 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   107.18 ms /     1 runs   (  107.18 ms per token,     9.33 tokens per second)
llama_print_timings:       total time =   109.52 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1451.38 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   103.07 ms /     1 runs   (  103.07 ms per token,     9.70 tokens per second)
llama_print_timings:       total time =   105.33 ms
Llama.generate: prefix-match hit


Completion(text=' be', start=21, stop=24, name=poem, chunk=True, score=0.999932873588113)
Completion(text=' happy', start=30, stop=36, name=poem, chunk=True, score=0.997731720151794)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1428.57 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    82.33 ms /     1 runs   (   82.33 ms per token,    12.15 tokens per second)
llama_print_timings:       total time =    84.54 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1447.18 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   126.23 ms /     1 runs   (  126.23 ms per token,     7.92 tokens per second)
llama_print_timings:       total time =   128.44 ms
Llama.generate: prefix-match hit


Completion(text=' to', start=30, stop=33, name=poem, chunk=True, score=0.99999689194915)
Completion(text=' help', start=37, stop=42, name=poem, chunk=True, score=0.8599402868845862)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1422.48 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   103.96 ms /     1 runs   (  103.96 ms per token,     9.62 tokens per second)
llama_print_timings:       total time =   106.21 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1453.49 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    96.56 ms /     1 runs   (   96.56 ms per token,    10.36 tokens per second)
llama_print_timings:       total time =    98.83 ms
Llama.generate: prefix-match hit


Completion(text='!', start=34, stop=35, name=poem, chunk=True, score=0.8021636884187764)
Completion(text=' Here', start=43, stop=48, name=poem, chunk=True, score=0.9873197262986634)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1449.28 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   148.19 ms /     1 runs   (  148.19 ms per token,     6.75 tokens per second)
llama_print_timings:       total time =   150.40 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1414.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    99.14 ms /     1 runs   (   99.14 ms per token,    10.09 tokens per second)
llama_print_timings:       total time =   101.52 ms
Llama.generate: prefix-match hit


Completion(text=''', start=40, stop=41, name=poem, chunk=True, score=0.7134032705807586)
Completion(text='s', start=41, stop=42, name=poem, chunk=True, score=0.9999979767711287)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1430.62 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    85.77 ms /     1 runs   (   85.77 ms per token,    11.66 tokens per second)
llama_print_timings:       total time =    88.02 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1436.78 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   125.60 ms /     1 runs   (  125.60 ms per token,     7.96 tokens per second)
llama_print_timings:       total time =   127.83 ms
Llama.generate: prefix-match hit


Completion(text=' a', start=44, stop=46, name=poem, chunk=True, score=0.999989591228975)
Completion(text=' poem', start=52, stop=57, name=poem, chunk=True, score=0.9377586647527572)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1447.18 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    99.12 ms /     1 runs   (   99.12 ms per token,    10.09 tokens per second)
llama_print_timings:       total time =   101.43 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1410.44 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    83.96 ms /     1 runs   (   83.96 ms per token,    11.91 tokens per second)
llama_print_timings:       total time =    86.21 ms
Llama.generate: prefix-match hit


Completion(text=' for', start=55, stop=59, name=poem, chunk=True, score=0.5325966819061909)
Completion(text=' you', start=59, stop=63, name=poem, chunk=True, score=0.9922621844575532)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1443.00 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    84.94 ms /     1 runs   (   84.94 ms per token,    11.77 tokens per second)
llama_print_timings:       total time =    87.17 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1455.60 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   131.08 ms /     1 runs   (  131.08 ms per token,     7.63 tokens per second)
llama_print_timings:       total time =   133.31 ms
Llama.generate: prefix-match hit


Completion(text=':', start=57, stop=58, name=poem, chunk=True, score=0.9999869317573624)
Completion(text='
', start=58, stop=59, name=poem, chunk=True, score=0.9999957937465832)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1424.50 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   104.28 ms /     1 runs   (  104.28 ms per token,     9.59 tokens per second)
llama_print_timings:       total time =   106.49 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1414.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    99.46 ms /     1 runs   (   99.46 ms per token,    10.05 tokens per second)
llama_print_timings:       total time =   101.74 ms
Llama.generate: prefix-match hit


Completion(text='P', start=59, stop=60, name=poem, chunk=True, score=0.7231784504321472)
Completion(text='and', start=64, stop=67, name=poem, chunk=True, score=0.9890667676097112)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1426.53 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    95.41 ms /     1 runs   (   95.41 ms per token,    10.48 tokens per second)
llama_print_timings:       total time =    97.64 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1428.57 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   113.01 ms /     1 runs   (  113.01 ms per token,     8.85 tokens per second)
llama_print_timings:       total time =   115.24 ms
Llama.generate: prefix-match hit


Completion(text='aman', start=69, stop=73, name=poem, chunk=True, score=0.9998332442952118)
Completion(text=',', start=67, stop=68, name=poem, chunk=True, score=0.8403620615830886)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1449.28 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   101.30 ms /     1 runs   (  101.30 ms per token,     9.87 tokens per second)
llama_print_timings:       total time =   103.55 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1434.72 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   116.64 ms /     1 runs   (  116.64 ms per token,     8.57 tokens per second)
llama_print_timings:       total time =   119.10 ms
Llama.generate: prefix-match hit


Completion(text=' the', start=74, stop=78, name=poem, chunk=True, score=0.8722101415316552)
Completion(text=' hero', start=80, stop=85, name=poem, chunk=True, score=0.9693502788076992)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.73 ms /     1 runs   (    0.73 ms per token,  1371.74 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   122.78 ms /     1 runs   (  122.78 ms per token,     8.14 tokens per second)
llama_print_timings:       total time =   125.14 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.73 ms /     1 runs   (    0.73 ms per token,  1362.40 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   112.91 ms /     1 runs   (  112.91 ms per token,     8.86 tokens per second)
llama_print_timings:       total time =   115.19 ms
Llama.generate: prefix-match hit


Completion(text=' of', start=81, stop=84, name=poem, chunk=True, score=0.9983598160953135)
Completion(text=' the', start=86, stop=90, name=poem, chunk=True, score=0.9707523035703777)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1443.00 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   123.44 ms /     1 runs   (  123.44 ms per token,     8.10 tokens per second)
llama_print_timings:       total time =   125.64 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.73 ms /     1 runs   (    0.73 ms per token,  1360.54 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   143.91 ms /     1 runs   (  143.91 ms per token,     6.95 tokens per second)
llama_print_timings:       total time =   146.24 ms
Llama.generate: prefix-match hit


Completion(text=' land', start=92, stop=97, name=poem, chunk=True, score=0.9304026061013966)
Completion(text=',', start=89, stop=90, name=poem, chunk=True, score=0.7330135097408242)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1410.44 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   104.81 ms /     1 runs   (  104.81 ms per token,     9.54 tokens per second)
llama_print_timings:       total time =   107.09 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1422.48 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   116.23 ms /     1 runs   (  116.23 ms per token,     8.60 tokens per second)
llama_print_timings:       total time =   118.61 ms
Llama.generate: prefix-match hit


Completion(text='
', start=90, stop=91, name=poem, chunk=True, score=0.9999736808023999)
Completion(text='A', start=91, stop=92, name=poem, chunk=True, score=0.5694346562181818)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1445.09 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   111.71 ms /     1 runs   (  111.71 ms per token,     8.95 tokens per second)
llama_print_timings:       total time =   113.92 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1438.85 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   110.58 ms /     1 runs   (  110.58 ms per token,     9.04 tokens per second)
llama_print_timings:       total time =   112.78 ms
Llama.generate: prefix-match hit


Completion(text=' friend', start=104, stop=111, name=poem, chunk=True, score=0.9774281218074055)
Completion(text=' to', start=103, stop=106, name=poem, chunk=True, score=0.9997622187057537)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1426.53 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   134.02 ms /     1 runs   (  134.02 ms per token,     7.46 tokens per second)
llama_print_timings:       total time =   136.26 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.73 ms /     1 runs   (    0.73 ms per token,  1371.74 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    90.60 ms /     1 runs   (   90.60 ms per token,    11.04 tokens per second)
llama_print_timings:       total time =    92.90 ms
Llama.generate: prefix-match hit


Completion(text=' Nick', start=110, stop=115, name=poem, chunk=True, score=0.9984414160642047)
Completion(text=',', start=107, stop=108, name=poem, chunk=True, score=0.9996713993695958)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.85 ms /     1 runs   (    0.85 ms per token,  1179.25 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   215.21 ms /     1 runs   (  215.21 ms per token,     4.65 tokens per second)
llama_print_timings:       total time =   219.32 ms
Llama.generate: prefix-match hit


Completion(text=' his', start=114, stop=118, name=poem, chunk=True, score=0.36084031623456875)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1406.47 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   527.54 ms /     1 runs   (  527.54 ms per token,     1.90 tokens per second)
llama_print_timings:       total time =   529.86 ms
Llama.generate: prefix-match hit


Completion(text=' heart', start=122, stop=128, name=poem, chunk=True, score=0.533928237550512)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1426.53 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   269.24 ms /     1 runs   (  269.24 ms per token,     3.71 tokens per second)
llama_print_timings:       total time =   271.66 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1426.53 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   154.00 ms /     1 runs   (  154.00 ms per token,     6.49 tokens per second)
llama_print_timings:       total time =   156.29 ms
Llama.generate: prefix-match hit


Completion(text=' so', start=122, stop=125, name=poem, chunk=True, score=0.7202129486609841)
Completion(text=' grand', start=131, stop=137, name=poem, chunk=True, score=0.9888280374280882)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1418.44 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   103.72 ms /     1 runs   (  103.72 ms per token,     9.64 tokens per second)
llama_print_timings:       total time =   106.03 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1424.50 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    88.16 ms /     1 runs   (   88.16 ms per token,    11.34 tokens per second)
llama_print_timings:       total time =    90.41 ms
Llama.generate: prefix-match hit


Completion(text='.', start=127, stop=128, name=poem, chunk=True, score=0.9997833774140517)
Completion(text='
', start=128, stop=129, name=poem, chunk=True, score=0.9999995466950242)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1436.78 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    89.36 ms /     1 runs   (   89.36 ms per token,    11.19 tokens per second)
llama_print_timings:       total time =    91.61 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1412.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    95.31 ms /     1 runs   (   95.31 ms per token,    10.49 tokens per second)
llama_print_timings:       total time =    97.62 ms
Llama.generate: prefix-match hit


Completion(text='With', start=135, stop=139, name=poem, chunk=True, score=0.9194338363499602)
Completion(text=' powers', start=145, stop=152, name=poem, chunk=True, score=0.9718612356284554)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1445.09 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   111.68 ms /     1 runs   (  111.68 ms per token,     8.95 tokens per second)
llama_print_timings:       total time =   113.88 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.72 ms /     1 runs   (    0.72 ms per token,  1385.04 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    86.09 ms /     1 runs   (   86.09 ms per token,    11.62 tokens per second)
llama_print_timings:       total time =    88.40 ms
Llama.generate: prefix-match hit


Completion(text=' beyond', start=152, stop=159, name=poem, chunk=True, score=0.4592738014280859)
Completion(text=' compare', start=161, stop=169, name=poem, chunk=True, score=0.3742353503035575)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1424.50 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    97.50 ms /     1 runs   (   97.50 ms per token,    10.26 tokens per second)
llama_print_timings:       total time =    99.77 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.68 ms /     1 runs   (    0.68 ms per token,  1461.99 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   181.00 ms /     1 runs   (  181.00 ms per token,     5.52 tokens per second)
llama_print_timings:       total time =   183.28 ms


Completion(text=',', start=155, stop=156, name=poem, chunk=True, score=0.990526519129926)
Completion(text='
', start=156, stop=157, name=poem, chunk=True, score=0.9654135314530301)


Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1449.28 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   144.50 ms /     1 runs   (  144.50 ms per token,     6.92 tokens per second)
llama_print_timings:       total time =   147.16 ms
Llama.generate: prefix-match hit


Completion(text='He', start=159, stop=161, name=poem, chunk=True, score=0.7950306645522822)
Completion(text=' f', start=161, stop=163, name=poem, chunk=True, score=0.48303827309075315)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1438.85 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    98.68 ms /     1 runs   (   98.68 ms per token,    10.13 tokens per second)
llama_print_timings:       total time =   101.07 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1414.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    98.37 ms /     1 runs   (   98.37 ms per token,    10.17 tokens per second)
llama_print_timings:       total time =   100.74 ms
Llama.generate: prefix-match hit


Completion(text='ights', start=169, stop=174, name=poem, chunk=True, score=0.9733054608120723)
Completion(text=' for', start=172, stop=176, name=poem, chunk=True, score=0.9312538106485317)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1402.52 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   106.70 ms /     1 runs   (  106.70 ms per token,     9.37 tokens per second)
llama_print_timings:       total time =   109.10 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1426.53 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   130.81 ms /     1 runs   (  130.81 ms per token,     7.64 tokens per second)
llama_print_timings:       total time =   133.03 ms
Llama.generate: prefix-match hit


Completion(text=' justice', start=184, stop=192, name=poem, chunk=True, score=0.7505151447158052)
Completion(text=',', start=178, stop=179, name=poem, chunk=True, score=0.9075623906438347)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1400.56 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   150.37 ms /     1 runs   (  150.37 ms per token,     6.65 tokens per second)
llama_print_timings:       total time =   152.69 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1412.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   123.29 ms /     1 runs   (  123.29 ms per token,     8.11 tokens per second)
llama_print_timings:       total time =   125.54 ms
Llama.generate: prefix-match hit


Completion(text=' without', start=193, stop=201, name=poem, chunk=True, score=0.49417541119855585)
Completion(text=' fear', start=195, stop=200, name=poem, chunk=True, score=0.37770891247738403)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1398.60 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   146.75 ms /     1 runs   (  146.75 ms per token,     6.81 tokens per second)
llama_print_timings:       total time =   149.09 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.72 ms /     1 runs   (    0.72 ms per token,  1383.13 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   115.05 ms /     1 runs   (  115.05 ms per token,     8.69 tokens per second)
llama_print_timings:       total time =   117.43 ms
Llama.generate: prefix-match hit


Completion(text='.', start=192, stop=193, name=poem, chunk=True, score=0.9940108928775653)
Completion(text='
', start=193, stop=194, name=poem, chunk=True, score=0.9999980606614188)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1414.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   147.65 ms /     1 runs   (  147.65 ms per token,     6.77 tokens per second)
llama_print_timings:       total time =   150.00 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.76 ms /     1 runs   (    0.76 ms per token,  1319.26 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   159.74 ms /     1 runs   (  159.74 ms per token,     6.26 tokens per second)
llama_print_timings:       total time =   162.18 ms
Llama.generate: prefix-match hit


Completion(text='
', start=194, stop=195, name=poem, chunk=True, score=0.9726782545904968)
Completion(text='N', start=195, stop=196, name=poem, chunk=True, score=0.6848277890795427)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1430.62 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   117.26 ms /     1 runs   (  117.26 ms per token,     8.53 tokens per second)
llama_print_timings:       total time =   119.52 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1424.50 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   103.46 ms /     1 runs   (  103.46 ms per token,     9.67 tokens per second)
llama_print_timings:       total time =   105.74 ms
Llama.generate: prefix-match hit


Completion(text='ick', start=200, stop=203, name=poem, chunk=True, score=0.9935177520218769)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1418.44 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   260.17 ms /     1 runs   (  260.17 ms per token,     3.84 tokens per second)
llama_print_timings:       total time =   262.52 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1402.52 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   102.14 ms /     1 runs   (  102.14 ms per token,     9.79 tokens per second)
llama_print_timings:       total time =   104.42 ms
Llama.generate: prefix-match hit


Completion(text=' and', start=205, stop=209, name=poem, chunk=True, score=0.1887293583931763)
Completion(text=' P', start=205, stop=207, name=poem, chunk=True, score=0.9999620327612888)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1455.60 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   107.02 ms /     1 runs   (  107.02 ms per token,     9.34 tokens per second)
llama_print_timings:       total time =   109.31 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1434.72 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    87.64 ms /     1 runs   (   87.64 ms per token,    11.41 tokens per second)
llama_print_timings:       total time =    89.96 ms
Llama.generate: prefix-match hit


Completion(text='and', start=209, stop=212, name=poem, chunk=True, score=0.9999300628768667)
Completion(text='aman', start=214, stop=218, name=poem, chunk=True, score=0.9999971374814681)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1416.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    83.69 ms /     1 runs   (   83.69 ms per token,    11.95 tokens per second)
llama_print_timings:       total time =    85.96 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1436.78 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   111.61 ms /     1 runs   (  111.61 ms per token,     8.96 tokens per second)
llama_print_timings:       total time =   113.87 ms
Llama.generate: prefix-match hit


Completion(text=',', start=212, stop=213, name=poem, chunk=True, score=0.9971252762868673)
Completion(text=' a', start=215, stop=217, name=poem, chunk=True, score=0.9492011736254254)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.73 ms /     1 runs   (    0.73 ms per token,  1360.54 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   132.97 ms /     1 runs   (  132.97 ms per token,     7.52 tokens per second)
llama_print_timings:       total time =   135.29 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1424.50 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    77.20 ms /     1 runs   (   77.20 ms per token,    12.95 tokens per second)
llama_print_timings:       total time =    79.45 ms
Llama.generate: prefix-match hit


Completion(text=' bond', start=223, stop=228, name=poem, chunk=True, score=0.3903475886927555)
Completion(text=' so', start=224, stop=227, name=poem, chunk=True, score=0.9931865239937988)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.73 ms /     1 runs   (    0.73 ms per token,  1377.41 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   103.22 ms /     1 runs   (  103.22 ms per token,     9.69 tokens per second)
llama_print_timings:       total time =   105.99 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1428.57 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    96.38 ms /     1 runs   (   96.38 ms per token,    10.38 tokens per second)
llama_print_timings:       total time =    98.74 ms
Llama.generate: prefix-match hit


Completion(text=' strong', start=235, stop=242, name=poem, chunk=True, score=0.9230281490355644)
Completion(text=',', start=230, stop=231, name=poem, chunk=True, score=0.9998728474677913)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1420.45 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    87.56 ms /     1 runs   (   87.56 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =    89.82 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.74 ms /     1 runs   (    0.74 ms per token,  1342.28 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    83.45 ms /     1 runs   (   83.45 ms per token,    11.98 tokens per second)
llama_print_timings:       total time =    85.88 ms
Llama.generate: prefix-match hit


Completion(text='
', start=231, stop=232, name=poem, chunk=True, score=0.9999996215985039)
Completion(text='T', start=232, stop=233, name=poem, chunk=True, score=0.988959545919373)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1406.47 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    90.30 ms /     1 runs   (   90.30 ms per token,    11.07 tokens per second)
llama_print_timings:       total time =    92.58 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1426.53 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    91.43 ms /     1 runs   (   91.43 ms per token,    10.94 tokens per second)
llama_print_timings:       total time =    93.70 ms
Llama.generate: prefix-match hit


Completion(text='ogether', start=245, stop=252, name=poem, chunk=True, score=0.9632645028420163)
Completion(text=' they', start=248, stop=253, name=poem, chunk=True, score=0.99903030746121)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.72 ms /     1 runs   (    0.72 ms per token,  1396.65 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    91.03 ms /     1 runs   (   91.03 ms per token,    10.99 tokens per second)
llama_print_timings:       total time =    93.32 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.75 ms /     1 runs   (    0.75 ms per token,  1328.02 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    95.74 ms /     1 runs   (   95.74 ms per token,    10.44 tokens per second)
llama_print_timings:       total time =    98.10 ms
Llama.generate: prefix-match hit


Completion(text=' fight', start=255, stop=261, name=poem, chunk=True, score=0.39027708805474437)
Completion(text=',', start=251, stop=252, name=poem, chunk=True, score=0.6476903948952425)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1400.56 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    85.20 ms /     1 runs   (   85.20 ms per token,    11.74 tokens per second)
llama_print_timings:       total time =    87.77 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1416.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   102.52 ms /     1 runs   (  102.52 ms per token,     9.75 tokens per second)
llama_print_timings:       total time =   104.79 ms
Llama.generate: prefix-match hit


Completion(text=' where', start=262, stop=268, name=poem, chunk=True, score=0.6443600880868257)
Completion(text=' right', start=268, stop=274, name=poem, chunk=True, score=0.42836606204781547)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1422.48 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    81.26 ms /     1 runs   (   81.26 ms per token,    12.31 tokens per second)
llama_print_timings:       total time =    83.52 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1408.45 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    82.04 ms /     1 runs   (   82.04 ms per token,    12.19 tokens per second)
llama_print_timings:       total time =    84.36 ms
Llama.generate: prefix-match hit


Completion(text=' is', start=268, stop=271, name=poem, chunk=True, score=0.4939221444596446)
Completion(text=' wrong', start=277, stop=283, name=poem, chunk=True, score=0.9707139595363026)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.72 ms /     1 runs   (    0.72 ms per token,  1396.65 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    99.14 ms /     1 runs   (   99.14 ms per token,    10.09 tokens per second)
llama_print_timings:       total time =   101.73 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1414.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   108.16 ms /     1 runs   (  108.16 ms per token,     9.25 tokens per second)
llama_print_timings:       total time =   110.48 ms
Llama.generate: prefix-match hit


Completion(text='.', start=273, stop=274, name=poem, chunk=True, score=0.9997693802199288)
Completion(text='
', start=274, stop=275, name=poem, chunk=True, score=0.9999991996662015)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.73 ms /     1 runs   (    0.73 ms per token,  1362.40 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   110.93 ms /     1 runs   (  110.93 ms per token,     9.01 tokens per second)
llama_print_timings:       total time =   113.34 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1426.53 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    93.66 ms /     1 runs   (   93.66 ms per token,    10.68 tokens per second)
llama_print_timings:       total time =    95.92 ms
Llama.generate: prefix-match hit


Completion(text='The', start=279, stop=282, name=poem, chunk=True, score=0.25272561547191336)
Completion(text='ir', start=280, stop=282, name=poem, chunk=True, score=0.9973428435836373)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.72 ms /     1 runs   (    0.72 ms per token,  1394.70 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    86.94 ms /     1 runs   (   86.94 ms per token,    11.50 tokens per second)
llama_print_timings:       total time =    89.44 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1449.28 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    97.96 ms /     1 runs   (   97.96 ms per token,    10.21 tokens per second)
llama_print_timings:       total time =   100.28 ms
Llama.generate: prefix-match hit


Completion(text=' friendship', start=300, stop=311, name=poem, chunk=True, score=0.9401542643971671)
Completion(text=' grows', start=301, stop=307, name=poem, chunk=True, score=0.6571695167729293)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1416.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    99.05 ms /     1 runs   (   99.05 ms per token,    10.10 tokens per second)
llama_print_timings:       total time =   101.40 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1418.44 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    95.02 ms /     1 runs   (   95.02 ms per token,    10.52 tokens per second)
llama_print_timings:       total time =    97.31 ms
Llama.generate: prefix-match hit


Completion(text=' with', start=305, stop=310, name=poem, chunk=True, score=0.5191050562848962)
Completion(text=' each', start=310, stop=315, name=poem, chunk=True, score=0.9365005133429113)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1412.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    96.95 ms /     1 runs   (   96.95 ms per token,    10.31 tokens per second)
llama_print_timings:       total time =    99.22 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1414.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   115.01 ms /     1 runs   (  115.01 ms per token,     8.69 tokens per second)
llama_print_timings:       total time =   117.30 ms
Llama.generate: prefix-match hit


Completion(text=' passing', start=321, stop=329, name=poem, chunk=True, score=0.8168783320627915)
Completion(text=' day', start=321, stop=325, name=poem, chunk=True, score=0.994463588421859)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1418.44 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    78.38 ms /     1 runs   (   78.38 ms per token,    12.76 tokens per second)
llama_print_timings:       total time =    80.65 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1430.62 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    91.94 ms /     1 runs   (   91.94 ms per token,    10.88 tokens per second)
llama_print_timings:       total time =    94.23 ms
Llama.generate: prefix-match hit


Completion(text=',', start=319, stop=320, name=poem, chunk=True, score=0.9999953380262725)
Completion(text='
', start=320, stop=321, name=poem, chunk=True, score=0.9999994152042928)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1426.53 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    83.15 ms /     1 runs   (   83.15 ms per token,    12.03 tokens per second)
llama_print_timings:       total time =    85.42 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.72 ms /     1 runs   (    0.72 ms per token,  1392.76 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    92.87 ms /     1 runs   (   92.87 ms per token,    10.77 tokens per second)
llama_print_timings:       total time =    95.17 ms
Llama.generate: prefix-match hit


Completion(text='As', start=323, stop=325, name=poem, chunk=True, score=0.34924530520131913)
Completion(text=' they', start=331, stop=336, name=poem, chunk=True, score=0.982206385909592)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1426.53 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    98.71 ms /     1 runs   (   98.71 ms per token,    10.13 tokens per second)
llama_print_timings:       total time =   101.00 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1422.48 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    91.96 ms /     1 runs   (   91.96 ms per token,    10.87 tokens per second)
llama_print_timings:       total time =    94.26 ms
Llama.generate: prefix-match hit


Completion(text=' stand', start=338, stop=344, name=poem, chunk=True, score=0.74493057299107)
Completion(text=' together', start=350, stop=359, name=poem, chunk=True, score=0.3600973347316881)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1414.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    97.66 ms /     1 runs   (   97.66 ms per token,    10.24 tokens per second)
llama_print_timings:       total time =    99.98 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1408.45 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    96.66 ms /     1 runs   (   96.66 ms per token,    10.35 tokens per second)
llama_print_timings:       total time =    98.96 ms
Llama.generate: prefix-match hit


Completion(text=',', start=343, stop=344, name=poem, chunk=True, score=0.9759607556289993)
Completion(text=' come', start=352, stop=357, name=poem, chunk=True, score=0.42621817882069823)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1406.47 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    96.75 ms /     1 runs   (   96.75 ms per token,    10.34 tokens per second)
llama_print_timings:       total time =    99.08 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1414.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    92.66 ms /     1 runs   (   92.66 ms per token,    10.79 tokens per second)
llama_print_timings:       total time =    94.96 ms
Llama.generate: prefix-match hit


Completion(text=' what', start=357, stop=362, name=poem, chunk=True, score=0.9996866796512898)
Completion(text=' may', start=360, stop=364, name=poem, chunk=True, score=0.9989492826250637)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1412.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    81.78 ms /     1 runs   (   81.78 ms per token,    12.23 tokens per second)
llama_print_timings:       total time =    84.08 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1412.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   100.87 ms /     1 runs   (  100.87 ms per token,     9.91 tokens per second)
llama_print_timings:       total time =   103.14 ms
Llama.generate: prefix-match hit


Completion(text='.', start=358, stop=359, name=poem, chunk=True, score=0.9999596548431923)
Completion(text='
', start=359, stop=360, name=poem, chunk=True, score=0.9999954167991999)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1416.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    92.43 ms /     1 runs   (   92.43 ms per token,    10.82 tokens per second)
llama_print_timings:       total time =    94.75 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1418.44 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    92.80 ms /     1 runs   (   92.80 ms per token,    10.78 tokens per second)
llama_print_timings:       total time =    95.14 ms
Llama.generate: prefix-match hit


Completion(text='
', start=360, stop=361, name=poem, chunk=True, score=0.9973752897940058)
Completion(text='P', start=361, stop=362, name=poem, chunk=True, score=0.9099208926844878)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.73 ms /     1 runs   (    0.73 ms per token,  1366.12 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    86.43 ms /     1 runs   (   86.43 ms per token,    11.57 tokens per second)
llama_print_timings:       total time =    89.00 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1418.44 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    88.03 ms /     1 runs   (   88.03 ms per token,    11.36 tokens per second)
llama_print_timings:       total time =    90.34 ms
Llama.generate: prefix-match hit


Completion(text='and', start=366, stop=369, name=poem, chunk=True, score=0.9998559136380993)
Completion(text='aman', start=371, stop=375, name=poem, chunk=True, score=0.9999958207753742)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1414.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   121.78 ms /     1 runs   (  121.78 ms per token,     8.21 tokens per second)
llama_print_timings:       total time =   124.06 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1420.45 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    95.22 ms /     1 runs   (   95.22 ms per token,    10.50 tokens per second)
llama_print_timings:       total time =    97.49 ms
Llama.generate: prefix-match hit

llama_pri

Completion(text=',', start=369, stop=370, name=poem, chunk=True, score=0.9213189753898781)
Completion(text=' the', start=376, stop=380, name=poem, chunk=True, score=0.5881420423424695)
Completion(text=' hero', start=382, stop=387, name=poem, chunk=True, score=0.5912215902556943)
Completion(text=' of', start=383, stop=386, name=poem, chunk=True, score=0.9940284482782773)


Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1416.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    96.13 ms /     1 runs   (   96.13 ms per token,    10.40 tokens per second)
llama_print_timings:       total time =    98.46 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1424.50 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   102.65 ms /     1 runs   (  102.65 ms per token,     9.74 tokens per second)
llama_print_timings:       total time =   104.94 ms
Llama.gene

Completion(text=' the', start=388, stop=392, name=poem, chunk=True, score=0.9558044346789292)
Completion(text=' night', start=396, stop=402, name=poem, chunk=True, score=0.2344810962463367)


llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1408.45 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    85.84 ms /     1 runs   (   85.84 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =    88.12 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1426.53 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    95.37 ms /     1 runs   (   95.37 ms per token,    10.49 tokens per second)
llama_print_timings:       total time =    97.67 ms
Llama.generate: prefix-match hit


Completion(text=',', start=392, stop=393, name=poem, chunk=True, score=0.9999920245851165)
Completion(text='
', start=393, stop=394, name=poem, chunk=True, score=0.9999998885063862)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1418.44 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    86.27 ms /     1 runs   (   86.27 ms per token,    11.59 tokens per second)
llama_print_timings:       total time =    88.64 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1420.45 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    97.75 ms /     1 runs   (   97.75 ms per token,    10.23 tokens per second)
llama_print_timings:       total time =   100.13 ms
Llama.generate: prefix-match hit


Completion(text='A', start=394, stop=395, name=poem, chunk=True, score=0.28937507999089146)
Completion(text=' friend', start=407, stop=414, name=poem, chunk=True, score=0.5721373917690858)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1408.45 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    82.86 ms /     1 runs   (   82.86 ms per token,    12.07 tokens per second)
llama_print_timings:       total time =    85.41 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1410.44 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   108.57 ms /     1 runs   (  108.57 ms per token,     9.21 tokens per second)
llama_print_timings:       total time =   110.89 ms
Llama.generate: prefix-match hit


Completion(text=' to', start=406, stop=409, name=poem, chunk=True, score=0.9957815092594308)
Completion(text=' Nick', start=413, stop=418, name=poem, chunk=True, score=0.9999893286464556)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1426.53 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    88.88 ms /     1 runs   (   88.88 ms per token,    11.25 tokens per second)
llama_print_timings:       total time =    91.15 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1426.53 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    88.76 ms /     1 runs   (   88.76 ms per token,    11.27 tokens per second)
llama_print_timings:       total time =    91.15 ms
Llama.generate: prefix-match hit


Completion(text=',', start=410, stop=411, name=poem, chunk=True, score=0.9999469952541795)
Completion(text=' sh', start=415, stop=418, name=poem, chunk=True, score=0.30339802416775835)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.73 ms /     1 runs   (    0.73 ms per token,  1360.54 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   160.30 ms /     1 runs   (  160.30 ms per token,     6.24 tokens per second)
llama_print_timings:       total time =   162.62 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1406.47 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    86.50 ms /     1 runs   (   86.50 ms per token,    11.56 tokens per second)
llama_print_timings:       total time =    88.90 ms
Llama.generate: prefix-match hit


Completion(text='ining', start=422, stop=427, name=poem, chunk=True, score=0.9987860215484946)
Completion(text=' so', start=423, stop=426, name=poem, chunk=True, score=0.6472715315761965)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1428.57 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    84.05 ms /     1 runs   (   84.05 ms per token,    11.90 tokens per second)
llama_print_timings:       total time =    86.33 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1438.85 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    99.73 ms /     1 runs   (   99.73 ms per token,    10.03 tokens per second)
llama_print_timings:       total time =   102.01 ms
Llama.generate: prefix-match hit


Completion(text=' bright', start=434, stop=441, name=poem, chunk=True, score=0.9999595189827397)
Completion(text='.', start=429, stop=430, name=poem, chunk=True, score=0.9999939059067385)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1428.57 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    94.49 ms /     1 runs   (   94.49 ms per token,    10.58 tokens per second)
llama_print_timings:       total time =    96.81 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1453.49 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    89.64 ms /     1 runs   (   89.64 ms per token,    11.16 tokens per second)
llama_print_timings:       total time =    91.95 ms
Llama.generate: prefix-match hit

llama_pri

Completion(text='
', start=430, stop=431, name=poem, chunk=True, score=0.9999938689142758)
Completion(text='With', start=437, stop=441, name=poem, chunk=True, score=0.7844389923914987)
Completion(text=' powers', start=447, stop=454, name=poem, chunk=True, score=0.22032203402872308)
Completion(text=' beyond', start=454, stop=461, name=poem, chunk=True, score=0.7350682363965977)


Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1424.50 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   119.67 ms /     1 runs   (  119.67 ms per token,     8.36 tokens per second)
llama_print_timings:       total time =   122.03 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1438.85 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    98.11 ms /     1 runs   (   98.11 ms per token,    10.19 tokens per second)
llama_print_timings:       total time =   100.48 ms
Llama.gene

Completion(text=' compare', start=463, stop=471, name=poem, chunk=True, score=0.8117992022627952)
Completion(text=',', start=457, stop=458, name=poem, chunk=True, score=0.9999613006636767)
Completion(text='
', start=458, stop=459, name=poem, chunk=True, score=0.9993266257110797)


Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1422.48 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    98.93 ms /     1 runs   (   98.93 ms per token,    10.11 tokens per second)
llama_print_timings:       total time =   101.28 ms
Llama.generate: prefix-match hit


Completion(text='He', start=461, stop=463, name=poem, chunk=True, score=0.999714360414436)
Completion(text=' f', start=463, stop=465, name=poem, chunk=True, score=0.9800670127665813)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1422.48 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    88.36 ms /     1 runs   (   88.36 ms per token,    11.32 tokens per second)
llama_print_timings:       total time =    90.64 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1418.44 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    89.32 ms /     1 runs   (   89.32 ms per token,    11.20 tokens per second)
llama_print_timings:       total time =    91.63 ms
Llama.generate: prefix-match hit


Completion(text='ights', start=471, stop=476, name=poem, chunk=True, score=0.9999741084180422)
Completion(text=' for', start=474, stop=478, name=poem, chunk=True, score=0.9999758979915181)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1428.57 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    85.85 ms /     1 runs   (   85.85 ms per token,    11.65 tokens per second)
llama_print_timings:       total time =    88.15 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1459.85 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   125.94 ms /     1 runs   (  125.94 ms per token,     7.94 tokens per second)
llama_print_timings:       total time =   128.31 ms
Llama.generate: prefix-match hit


Completion(text=' justice', start=486, stop=494, name=poem, chunk=True, score=0.9982909076408045)
Completion(text=',', start=480, stop=481, name=poem, chunk=True, score=0.999924180995957)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1418.44 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    87.56 ms /     1 runs   (   87.56 ms per token,    11.42 tokens per second)
llama_print_timings:       total time =    89.87 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1416.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   116.64 ms /     1 runs   (  116.64 ms per token,     8.57 tokens per second)
llama_print_timings:       total time =   119.01 ms
Llama.generate: prefix-match hit


Completion(text=' without', start=495, stop=503, name=poem, chunk=True, score=0.9975758478711025)
Completion(text=' fear', start=497, stop=502, name=poem, chunk=True, score=0.9991775885407321)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1416.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   109.78 ms /     1 runs   (  109.78 ms per token,     9.11 tokens per second)
llama_print_timings:       total time =   112.12 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1424.50 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    85.98 ms /     1 runs   (   85.98 ms per token,    11.63 tokens per second)
llama_print_timings:       total time =    88.30 ms
Llama.generate: prefix-match hit


Completion(text='.', start=494, stop=495, name=poem, chunk=True, score=0.992289784113582)
Completion(text='
', start=495, stop=496, name=poem, chunk=True, score=0.9982222702801263)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1406.47 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    84.48 ms /     1 runs   (   84.48 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =    86.82 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1420.45 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    98.20 ms /     1 runs   (   98.20 ms per token,    10.18 tokens per second)
llama_print_timings:       total time =   100.50 ms
Llama.generate: prefix-match hit

llama_pri

Completion(text='
', start=496, stop=497, name=poem, chunk=True, score=0.9322552337045164)
Completion(text='User', start=503, stop=507, name=poem, chunk=True, score=0.6607935006182248)
> [0;32m/Users/nick/Projects/keymaker/keymaker/constraints/stops.py[0m(36)[0;36mconstrain_tokens[0;34m()[0m
[0;32m     34 [0;31m        [0;32mif[0m [0mmatch[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     35 [0;31m            [0;32mimport[0m [0mpdb[0m[0;34m;[0m [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 36 [0;31m            [0;32mif[0m [0;32mnot[0m [0mself[0m[0;34m.[0m[0minclude[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     37 [0;31m                [0;32mreturn[0m [0mcompletion_text[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     38 [0;31m            [0;32mreturn[0m [0mcompletion_text[0m [0;34m+[0m [0mmatch[0m[0;34m.[0m[0mgroup[0m[0;34m([0m[0;34m'stop'[0m[0;34m)[0m[0;34m

Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1422.48 tokens per second)
llama_print_timings: prompt eval time =  1858.64 ms /    25 tokens (   74.35 ms per token,    13.45 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =  1892.65 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1426.53 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    99.54 ms /     1 runs   (   99.54 ms per token,    10.05 tokens per second)
llama_print_timings:       total time =   103.78 ms
Llama.gene

Completion(text='1', start=2, stop=3, name=math, chunk=True, score=0.9999898466580307)
Completion(text='5', start=3, stop=4, name=math, chunk=True, score=0.9999977481802185)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1434.72 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    99.11 ms /     1 runs   (   99.11 ms per token,    10.09 tokens per second)
llama_print_timings:       total time =   103.22 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1428.57 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    79.50 ms /     1 runs   (   79.50 ms per token,    12.58 tokens per second)
llama_print_timings:       total time =    83.55 ms
Llama.generate: prefix-match hit


Completion(text='1', start=4, stop=5, name=math, chunk=True, score=0.8002243374889149)
Completion(text='1', start=5, stop=6, name=math, chunk=True, score=0.17281548957637063)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1459.85 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   118.60 ms /     1 runs   (  118.60 ms per token,     8.43 tokens per second)
llama_print_timings:       total time =   123.08 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.73 ms /     1 runs   (    0.73 ms per token,  1367.99 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   114.79 ms /     1 runs   (  114.79 ms per token,     8.71 tokens per second)
llama_print_timings:       total time =   119.16 ms
Llama.generate: prefix-match hit


Completion(text='5', start=6, stop=7, name=math, chunk=True, score=0.34884385433088655)
Completion(text='1', start=7, stop=8, name=math, chunk=True, score=0.9466990929653938)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1406.47 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   112.42 ms /     1 runs   (  112.42 ms per token,     8.90 tokens per second)
llama_print_timings:       total time =   116.79 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1424.50 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    89.71 ms /     1 runs   (   89.71 ms per token,    11.15 tokens per second)
llama_print_timings:       total time =    93.89 ms
Llama.generate: prefix-match hit


Completion(text='1', start=8, stop=9, name=math, chunk=True, score=0.7421705918820747)
Completion(text='5', start=9, stop=10, name=math, chunk=True, score=0.9655507163896916)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.73 ms /     1 runs   (    0.73 ms per token,  1375.52 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   113.61 ms /     1 runs   (  113.61 ms per token,     8.80 tokens per second)
llama_print_timings:       total time =   117.90 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1424.50 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   115.03 ms /     1 runs   (  115.03 ms per token,     8.69 tokens per second)
llama_print_timings:       total time =   119.21 ms
Llama.generate: prefix-match hit


Completion(text='1', start=10, stop=11, name=math, chunk=True, score=0.9633834555113477)
Completion(text='1', start=11, stop=12, name=math, chunk=True, score=0.9274791769050519)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1459.85 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   101.80 ms /     1 runs   (  101.80 ms per token,     9.82 tokens per second)
llama_print_timings:       total time =   105.89 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1434.72 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   130.38 ms /     1 runs   (  130.38 ms per token,     7.67 tokens per second)
llama_print_timings:       total time =   134.49 ms
Llama.generate: prefix-match hit


Completion(text='5', start=12, stop=13, name=math, chunk=True, score=0.9665881741303927)
Completion(text='1', start=13, stop=14, name=math, chunk=True, score=0.990830196891143)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1434.72 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   106.69 ms /     1 runs   (  106.69 ms per token,     9.37 tokens per second)
llama_print_timings:       total time =   110.82 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1432.66 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    97.25 ms /     1 runs   (   97.25 ms per token,    10.28 tokens per second)
llama_print_timings:       total time =   101.44 ms
Llama.generate: prefix-match hit


Completion(text='1', start=14, stop=15, name=math, chunk=True, score=0.990096559829266)
Completion(text='5', start=15, stop=16, name=math, chunk=True, score=0.9384378390901517)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1440.92 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   107.09 ms /     1 runs   (  107.09 ms per token,     9.34 tokens per second)
llama_print_timings:       total time =   111.19 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1434.72 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    90.99 ms /     1 runs   (   90.99 ms per token,    10.99 tokens per second)
llama_print_timings:       total time =    95.04 ms
Llama.generate: prefix-match hit


Completion(text='1', start=16, stop=17, name=math, chunk=True, score=0.997790776943576)
Completion(text='1', start=17, stop=18, name=math, chunk=True, score=0.9695688050642101)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1449.28 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   110.33 ms /     1 runs   (  110.33 ms per token,     9.06 tokens per second)
llama_print_timings:       total time =   114.48 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1420.45 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   122.53 ms /     1 runs   (  122.53 ms per token,     8.16 tokens per second)
llama_print_timings:       total time =   126.61 ms
Llama.generate: prefix-match hit


Completion(text='5', start=18, stop=19, name=math, chunk=True, score=0.9407340646661422)
Completion(text='1', start=19, stop=20, name=math, chunk=True, score=0.9983559445379621)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.69 ms /     1 runs   (    0.69 ms per token,  1440.92 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   110.42 ms /     1 runs   (  110.42 ms per token,     9.06 tokens per second)
llama_print_timings:       total time =   114.49 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1430.62 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    90.51 ms /     1 runs   (   90.51 ms per token,    11.05 tokens per second)
llama_print_timings:       total time =    94.61 ms
Llama.generate: prefix-match hit


Completion(text='1', start=20, stop=21, name=math, chunk=True, score=0.9767956729293311)
Completion(text='5', start=21, stop=22, name=math, chunk=True, score=0.9063123984733876)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1424.50 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   100.39 ms /     1 runs   (  100.39 ms per token,     9.96 tokens per second)
llama_print_timings:       total time =   104.47 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1436.78 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    94.67 ms /     1 runs   (   94.67 ms per token,    10.56 tokens per second)
llama_print_timings:       total time =    98.78 ms
Llama.generate: prefix-match hit


Completion(text='1', start=22, stop=23, name=math, chunk=True, score=0.9984156885464268)
Completion(text='1', start=23, stop=24, name=math, chunk=True, score=0.9602804527614894)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1428.57 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    98.36 ms /     1 runs   (   98.36 ms per token,    10.17 tokens per second)
llama_print_timings:       total time =   102.58 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1414.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    92.90 ms /     1 runs   (   92.90 ms per token,    10.76 tokens per second)
llama_print_timings:       total time =    97.00 ms
Llama.generate: prefix-match hit


Completion(text='5', start=24, stop=25, name=math, chunk=True, score=0.9391089756408056)
Completion(text='1', start=25, stop=26, name=math, chunk=True, score=0.9987798325807007)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.70 ms /     1 runs   (    0.70 ms per token,  1426.53 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   101.81 ms /     1 runs   (  101.81 ms per token,     9.82 tokens per second)
llama_print_timings:       total time =   105.90 ms
Llama.generate: prefix-match hit


Completion(text='1', start=26, stop=27, name=math, chunk=True, score=0.9627398958975784)
Completion(text='1', start=2, stop=3, name=fin, chunk=True, score=0.9999998834967918)
Completion(text='6', start=3, stop=4, name=fin, chunk=True, score=0.9999999394308399)



llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1404.49 tokens per second)
llama_print_timings: prompt eval time =   986.05 ms /    13 tokens (   75.85 ms per token,    13.18 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =  1004.86 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =  9736.54 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1414.43 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   138.64 ms /     1 runs   (  138.64 ms per token,     7.21 tokens per second)
llama_print_timings:       total time =   142.81 ms


<IPython.core.display.Javascript object>

In [8]:
filled_in

Prompt('Time: 2023-07-21 23:02:54
User: Hi, my name is Nick.
Assistant: Hello, Nick!
User: Can you write me a poem about a superhero named pandaman being a friend to Nick?
Assistant: Of course, I'd be happy to help! Here's a poem for you:
Pandaman, the hero of the land,
A friend to Nick, his heart so grand.
With powers beyond compare,
He fights for justice, without fear.

Nick and Pandaman, a bond so strong,
Together they fight, where right is wrong.
Their friendship grows with each passing day,
As they stand together, come what may.

Pandaman, the hero of the night,
A friend to Nick, shining so bright.
With powers beyond compare,
He fights for justice, without fear.

User
User: What is 10+5?
Assistant: The answer is 10+5=Duh me no know.

The final answer is Duh me no know.!
')

<IPython.core.display.Javascript object>

In [9]:
filled_in.completions.poem

Completion(text=' Of course, I'd be happy to help! Here's a poem for you:
Pandaman, the hero of the land,
A friend to Nick, his heart so grand.
With powers beyond compare,
He fights for justice, without fear.

Nick and Pandaman, a bond so strong,
Together they fight, where right is wrong.
Their friendship grows with each passing day,
As they stand together, come what may.

Pandaman, the hero of the night,
A friend to Nick, shining so bright.
With powers beyond compare,
He fights for justice, without fear.

User', start=174, stop=673, name=poem, chunk=False, score=7.346943203117714e-15)

<IPython.core.display.Javascript object>

In [10]:
filled_in.completions.math

'Duh me no know.'

<IPython.core.display.Javascript object>