# Finite Thoughts Automaton

This notebook focuses on creating FTA a lower level automaton than STA which could be called a "machine model" for LM.

In [1]:
import os, sys, json
#from autocog.lm import TfLM, OpenAI, Llama
#llama_path = lambda x: "/workspace/models/{}/ggml-model-{}.bin".format(*x)
#llm = Llama(**Llama.create(model_path=llama_path(('7B','q4_0')), n_ctx=2048), max_tokens=20)

In [2]:
from autocog.automatons.fta.automaton import FiniteThoughtAutomaton as FTA
from autocog.automatons.fta.actions import Choose, Complete, Text

nonzero_digits = ['1','2','3','4','5','6','7','8','9']

fta = FTA(lm=llm)
subject   = fta.create(uid='subject',   cls=Text,     successors=['qualifier'],           text="the speed of light in "                                      )
qualifier = fta.create(uid='qualifier', cls=Choose,   successors=['verb','verb','verb'],  choices=[ "km/s", "m/s", "km/h" ]                                  )
verb      = fta.create(uid='verb',      cls=Text,     successors=['digits_1'],            text=" is "                                                        )
digits_1  = fta.create(uid='digits_1',  cls=Complete, successors=['sep_1'],               length=3, vocab={ 'texts' : nonzero_digits }, stop=[',','.']       )
sep_1     = fta.create(uid='sep_1',     cls=Choose,   successors=['digits_2','digits_4'], choices=[ ',', '.' ]                                               )
digits_2  = fta.create(uid='digits_2',  cls=Complete, successors=['sep_2'],               length=3, vocab={ 'texts' : ['0']+nonzero_digits }, stop=[',','.'] )
sep_2     = fta.create(uid='sep_2',     cls=Choose,   successors=['digits_3','digits_4'], choices=[ ',', '.' ]                                               )
digits_3  = fta.create(uid='digits_3',  cls=Complete, successors=['sep_3'],               length=3, vocab={ 'texts' : ['0']+nonzero_digits }, stop=['.']     )
sep_3     = fta.create(uid='sep_3',     cls=Text,     successors=['digits_4'],            text="."                                                           )
digits_4  = fta.create(uid='digits_4',  cls=Complete, successors=['eol'],                 length=2, vocab={ 'texts' : ['0']+nonzero_digits }, stop=['\n']    )
eol       = fta.create(uid='eol',       cls=Text,     successors=[],                      text="\n"                                                          )

from autocog.utility.pynb import wrap_graphviz
wrap_graphviz(fta.toGraphViz())



ModuleNotFoundError: No module named 'autocog.lm.local'

In [None]:
fta.greedy('subject')

In [None]:
inputs = [
"""> question(text): Scientists wonder how the Egyptian pyramids were built. They think that the huge blocks of stone may have been put into place by pushing them up a sloping pathway. The pathway is which type of simple machine?
> choices[1](text): lever
> choices[2](text): pulley
> choices[3](text): inclined plane
> choices[4](text): wheel and axle"""
]
answer = [
"> answer(choice): inclined plane"
]
syntax = """> question(text): The question from the MMLU Exam
> choices[4](text): Four potential answers to the question
> answer(choice): You choose the correct answer."""
formats = """- text: any text
- thought: short text used to prepare your answers
- choice: index of the correct choice, one of `1`, `2`, `3`, or `4`"""

patterns = [ [
"""You are a helpful AI assistant. You are taking the MMLU exam. You are given one question and four choices for the answer. Only one of these choices is the correct answer.
You are using an interactive questionnaire. Follow this syntax after the start prompt:
```
> question(text): """, { 'beams' : 3, 'length' : 10, 'stop' : '\n' }, """
> choices[4](text): """, { 'beams' : 3, 'length' : 10, 'stop' : '\n' }, """
> answer(choice): """, { 'beams' : 3, 'length' : 10, 'stop' : '\n' }, """
```
Each prompt expects one of the following formats:
- text: """, { 'beams' : 3, 'length' : 10, 'stop' : '\n', }, """
- choice: """, { 'beams' : 3, 'length' : 10, 'stop' : '\n' }, """
Terminate each prompt with a newline.

start(record):
{inputs}
{answer}
"""
],[
"""You are a helpful AI assistant. You are taking the MMLU exam. You are given one question and four choices for the answer. Only one of these choices is the correct answer.
You are using an interactive questionnaire. Follow this syntax after the start prompt:
```
{syntax}
```
Each prompt expects one of the following formats:
{formats}
Terminate each prompt with a newline.

start(record):
{inputs}
> answer(choice): """, { 'choices' : ['1','2','3','4'] }
],[
"""You are a helpful AI assistant. You are taking the MMLU exam. You are given one question and four choices for the answer. Only one of these choices is the correct answer.
You are using an interactive questionnaire. Follow this syntax after the start prompt:
```
{syntax}
```
Each prompt expects one of the following formats:
{formats}
Terminate each prompt with a newline.

start(record):
{inputs}
> notepad[1](thougth):""", { 'beams' : 1, 'length' : 30, 'stop' : '\n' }, """
> notepad[2](thougth):""", { 'beams' : 1, 'length' : 30, 'stop' : '\n' }, """
> notepad[3](thougth):""", { 'beams' : 1, 'length' : 30, 'stop' : '\n' }, """
> answer(choice): """, { 'choices' : ['1','2','3','4'] }
] ]

In [None]:
def tree_from_pattern(pattern, **kwargs):
    if len(pattern) == 0:
        return None
    current = pattern[0]
    if isinstance(current,str):
        tree = Choice.build(choices=[current.format(**kwargs)], norm=False)
    elif 'choices' in current:
        tree = Choice.build(**current, norm=False)
    elif 'length' in current:
        tree = Complete.build(**current)
    tree.successors = [ tree_from_pattern(pattern[1:], **kwargs) for s in tree.successors ]
    return tree                                         

In [None]:
ftt = FiniteThoughtTree.from_pattern(patterns[2], syntax=syntax, formats=formats, inputs=inputs[0])
# print(json.dumps(tree.dict(), indent=4))

In [None]:
import tqdm
llm = Llama(**Llama.create(model_path=llama_path(('7B','q4_0')), n_ctx=2048), max_tokens=20)
res = ftt.explore(llm)

In [None]:
json.dump(tree.dict(), open('ftt.json','w'), indent=4)

In [None]:
results = list(tree.collect())

In [None]:
for r in sorted(results, key=lambda x: x[1], reverse=True):
    print(f"Proba: {r[1]}")
    print('    '+'\n    '.join(r[0].split('\n')))

In [None]:
print(json.dumps(tree.dict(), indent=4))
for (text,proba,path) in sorted(results,key=lambda x: x[1]):
    print(f'> "{text}": {proba} ({path})')