In [1]:
import sys
from autocog import CogArch
from autocog.architecture.utility import PromptTee
from autocog.lm import Llama

arch = CogArch(pipe=PromptTee(prefix='qna', tee=sys.stdout))
llama = Llama.create(model_path="/workspace/llama-2-7b-chat.ggmlv3.q8_0.bin", n_ctx=4096)
arch.orchestrator.LMs.update({ 'text' : Llama(**llama, completion_kwargs={ 'max_tokens' : 20, 'temperature' : 0.4 }) })



llama.cpp: loading model from /workspace/llama-2-7b-chat.ggmlv3.q8_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 4096
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 5.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 7 (mostly Q8_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: mem required  = 6828.73 MB (+ 2048.00 MB per state)
llama_new_context_with_model: kv self size  = 2048.00 MB
ll

In [2]:
program = """\
entry(qna_main): This is a simple question and answer system.

prompt(qna_main): Given a user question, display your work, then formulate an answer to be presented to the user.
- target(question) source(?question)
> question: the user asks a question
> work[1,10](thought): use as many thoughts as needed to find the correct answer
> answer: your standalone answer based on your thinking.
__exit(answer):
"""
cog = arch.load(tag=f'qna', language='sta', program=program)
for p in cog.prompts.values():
    p.header = '[INST]\n<<SYS>>{preamble}<</SYS>>\n{automaton}\n{prompt}\n{basics}\n{mechs}\n```\n{mechanics}```\n{fmts}\n{formats}\n{postscriptum}\n[/INST]\nstart(record):\n'

In [3]:
await arch('qna', question='What is the capital of France?')



 === qna[0] === 

[INST]
<<SYS>>You are a helpful AI assistant.<</SYS>>
This is a simple question and answer system.
Given a user question, display your work, then formulate an answer to be presented to the user.
You are using an interactive questionnaire.
Follow this structure after the start prompt:
```
> question(text): the user asks a question
> work[1,10](thought): use as many thoughts as needed to find the correct answer
> answer(text): your standalone answer based on your thinking.
```
Each prompt expects one of the following formats:
- text: ASCII text in any form
- thought: use thought to take notes as you perform a task (a few words per lines)
Terminate each prompt with a newline. Use as many statement with `thought` format as needed.
[/INST]
start(record):
> question(text): What is the capital of France?
> work[1](thought): 🤔 Let me check... (thought)
> work[2](thought): 🇫🇷 Paris! (thought)
> answer(text): 👍 Correct! The capital of France is indeed Paris. (text)


{'answer': '👍 Correct! The capital of France is indeed Paris. (text)'}

In [4]:
await arch('qna', question='In a room I have only 3 sisters. Anna is reading a book. Alice is playing a match of chess. What the third sister, Amanda is doing ?')



 === qna[1] === 

[INST]
<<SYS>>You are a helpful AI assistant.<</SYS>>
This is a simple question and answer system.
Given a user question, display your work, then formulate an answer to be presented to the user.
You are using an interactive questionnaire.
Follow this structure after the start prompt:
```
> question(text): the user asks a question
> work[1,10](thought): use as many thoughts as needed to find the correct answer
> answer(text): your standalone answer based on your thinking.
```
Each prompt expects one of the following formats:
- text: ASCII text in any form
- thought: use thought to take notes as you perform a task (a few words per lines)
Terminate each prompt with a newline. Use as many statement with `thought` format as needed.
[/INST]
start(record):
> question(text): In a room I have only 3 sisters. Anna is reading a book. Alice is playing a match of chess. What the third sister, Amanda is doing ?
> work[1](thought): 🤔 Hmm, let me think... Could she be drawing somet

{'answer': '📚 is reading a book as well! 😊'}