In [8]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [1]:
from guidance import models, gen, select, instruction
from anli.config import Config
import guidance
import logging

In [17]:
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

In [18]:
NLU = Config("/root/Python-ANLI/anli/example_configs/config.yaml")
# NLU = Config()

DEBUG:urllib3.connectionpool:Resetting dropped connection: huggingface.co
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf HTTP/1.1" 302 0
DEBUG:root:loading chat model: TheBloke/Mistral-7B-Instruct-v0.1-GGUF-mistral-7b-instruct-v0.1.Q4_K_M.gguf
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf HTTP/1.1" 302 0
DEBUG:root:loading completion model: TheBloke/Mistral-7B-Instruct-v0.1-GGUF-mistral-7b-instruct-v0.1.Q4_K_M.gguf


In [30]:
llama2 = NLU.model_completion
llmc = NLU.model_chat

# guidance code

In [31]:
from guidance import gen, select, silent, capture, Tool, one_or_more, any_char, commit_point
from guidance import system, user, assistant
import collections

First, let's write an initial prompt with some few-shot examples of REACT.
This prompt is trying to get the model to:

If the user's request didn't specify a tool,
search the registry to find a command line tool fits complete user's request, and confirm with the user.
If the user's request did specify a tool, then read the help manual of the tool, and identify the relevant command line options.
Determine whether the user's request provided all the parameters and options, and if not, ask the user for more information.
Construct the command line, Do extra searches if needed. (optionally) confirm with the user, and execute it.

In [32]:
@guidance
def init_system(lm):
    # silent makes this whole thing not appear in the jupyter notebook visualization
    with silent():
        with system():
            lm += '''\
You are a nice chatbot that assist users in executing command line programs.
Whenever you receive a request, you perform a logical reasoning step by step to determine what to do.
Every step, you can choose to think more about your plan, or you can take an action to obtain more information.
You run a loop of [Thought, Act, Observation] until you achieve a response, where Act is one of 
(search, read_manual, gather_parameters, construct_command, execute_command, chat_with_user).

search (if needed): If the user doesn't specify a tool, you use an internet search engine to find a command line tool
 that fits the user's request. If your search doesn't return a result, you can change your search query and search 
 again. If you can't find a tool after several attempts, you ask the user for more information.
read_manual (if applicable): If a tool is specified, you read the tool's help manual to identify relevant command line options.
gather_parameters: Determine if all necessary parameters and options are provided. If not, ask the user for more information.
construct_command : Construct the command line based on the information you collected. 
execute_command: confirming with the user before execute it
chat_with_user: use this to communicate with the user.
'''
# -----
# Example 1:
# User: "I need to get the status of pods in a Kubernetes cluster."
# Thought: The user needs information from a Kubernetes cluster. The kubectl command is suitable for this task.
# Act: read_manual(kubectl get pods)
# Observation: [Read about 'kubectl get pods']
# Thought: I need to confirm the namespace or if the user wants the status from all namespaces.
# Act: confirm_with_user(Do you want the status of pods from a specific namespace or all namespaces?)
# User: "From the 'dev' namespace, please."
# Thought: Now, I have all the details to construct the command.
# Act: construct_command(kubectl get pods -n dev)
# Thought: I should confirm the command with the user before executing.
# Act: chat_with_user(Execute command: kubectl get pods -n dev?)
# User: "Yes, go ahead."
# Act: execute_command()
# 
# Example 2:
# User: "I want to create a new branch in my Git repository."
# Thought: The user wants to use Git for branch management. I need to use the git branch command.
# Act: read_manual(git branch)
# Observation: [Confirmed usage of 'git branch' for creating new branches]
# Thought: I need to know the name of the new branch.
# Act: chat_with_user(What is the name of the new branch you want to create?)
# User: "Feature_X"
# Thought: Now, I can construct the Git command.
# Act: construct_command(git branch Feature_X)
# Thought: It's best to confirm the command with the user before executing.
# Act: chat_with_user(Execute command: git branch Feature_X?)
# User: "Yes, that's correct."
# Act: execute_command()
#             '''
    return lm

In [33]:
@guidance
def search(lm, query):
    from langchain.tools import DuckDuckGoSearchResults
    # Setting this for later use
    lm = lm.set('query', query)
    # This is where search actually gets called
    search = DuckDuckGoSearchResults()
    results = search.run(query)
    # todo: use llamaIndex to do reranking
    temp_lm = (lm + "Search result:\n"
                    f"""{results}"""
    "\nInternal Thinking: Does the search result provides information "
                    "that can help me solve the current task?\nAnswer:"
               + select(['yes', 'no'], name='answer'))
    if temp_lm['answer'] == 'no':
        temp_lm += ('\nInternal Thinking: Based on the last search result, I should adjust my '
                    'search terms to be more specific. Do I have a better search term?\nAnswer:'
                    + gen(name='better_search_term', stop='\n', min_length=1, max_length=100))
        lm += '\nObservation:\nLast search result was not relevant. I should adjust my search term to: ' + temp_lm['better_search_term']
    else:
        temp_lm += ("\nInternal Thinking:\n Here's one paragraph summary of the command that I can use for "
                    f'''current task:"{gen(name='summary', stop='"', min_length=1, max_length=100)}"\n''')
        logger.info("search_summary:\n" + temp_lm['summary'])
        lm = lm.set('search_summary', temp_lm['summary'])
        lm += '\nObservation:\n' + lm['search_summary'] + '\n'
    return lm


In [34]:
import subprocess

def run_command(command):
    """Run a shell command and return its output."""
    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
    return result.stdout, result.stderr

def get_help_for_command(base_command):
    """Recursively get help text for a command and its child commands."""
    help_text, _ = run_command(f"{base_command} --help")
    return help_text

def token_count(llm, text):
    # This only works for LlamaCpp from guidance
    tok = llm._orig_tokenizer.llama.tokenize(text.encode('utf-8'))
    return len(tok)

In [35]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap  = 50,
    length_function = len,
    is_separator_regex = False,
)

def convert_help_page_to_text(s):
    """
    Process Backspace Characters (\x08):  In command-line formatting, backspace is often used to create bold or underlined text by writing a character, moving back with a backspace, and then overwriting it with the same or a different character. For plain text conversion, you'd typically want to keep just one instance of the character.
    """
    import re
    return re.sub('.\x08', '', s)

In [36]:
# test = get_help_for_command('git branch')
# %timeit convert_help_page_to_text(test)

In [37]:
@guidance
def read_manual(lm, base_command):
    # Setting this for later use
    lm = lm.set('base_command', base_command)
    lm = lm.set('manual', get_help_for_command(base_command))
    # todo: use llamaIndex to do reranking
    len_manual = token_count(lm, lm['manual'])
    if len_manual > 500:
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size = int(len(lm['manual'])/(int(len_manual/500)+1)),
            chunk_overlap  = 50,
            length_function = len,
            is_separator_regex = False,
        )
        docs = text_splitter.create_documents([convert_help_page_to_text(lm['manual'])])
        summary = []
        for d in docs:
            temp_lm = (lm + "Part of the manual:\n"
                        f"""{d}\n""" + "Internal Thinking: Does the manual provides relevant information?\nAnswer:" + 
                       select(['yes', 'no'], name='answer')+ '\n'"")
            if temp_lm['answer'] == 'no':
                pass
            else:
                temp_lm += ("\nInternal Thinking:\n Here's one paragraph summary of the command that I can use for "
                            f'''current task:"{gen(name='summary', stop='"', min_length=1, max_length=100)}"\n''')
                logger.info(temp_lm)
                summary.append(temp_lm['summary'])
        lm += '\nObservation:\n' + ";".join(summary) + '\n'
    else:
        temp_lm = (lm + "\nInternal Thinking:\n Here's one paragraph summary of the command that I can use for "
                        f'''current task:"{gen(name='summary', stop='"', min_length=1, max_length=100)}"\n''')
        logger.info(temp_lm)
        lm = lm.set('manual_summary', temp_lm['summary'])
        lm += '\nObservation:\n' + lm['manual'] + '\n'
    return lm

In [38]:
@guidance
def construct_command(lm):
    temp_lm = (lm + '\nInternal Thinking:\nBased on the manual and the information provided by the user, do I have all the parameters and options to construct the final command?\n' + select(['yes', 'no'], name='answer'))
    if temp_lm['answer'] == 'no':
        lm += '\nObservation:\nI need to ask the user for more information.'
    elif temp_lm['answer'] == 'yes':
        temp_lm = (lm + '\nInternal Thinking:\nBased on the information I collected, the command line should be:\n' 
        + gen(name='command', stop='\n', min_length=1, max_length=100))
        logger.info(temp_lm)
        lm = lm.set('command', temp_lm['command'])
        lm += '\nObservation:\nfinal command should be:\n' + lm['command'] + '\n'
    return lm

@guidance
def gather_parameters(lm):
    # Setting this for later use
    temp_lm = (lm + '\nInternal Thinking:\nBased on the manual and the information provided by the user, do I have '
                    'all the parameters and options to construct the final command?\n' 
    + select(['yes', 'no'], name='answer'))
    if temp_lm['answer'] == 'no':
        lm += '\nObservation:\nI need to ask the user for more information.\n'
    else:
        lm += '\nObservation:\nI have all the information I need.\n'
    return lm


In [39]:
@guidance
def execute_command(lm, command):
    lm = lm.set('execute_result', run_command(command))
    lm += '\nObservation:\nexecution result:\n' + lm['execute_result']
    return lm

In [46]:
@guidance
def chat_search(lm, query):
    with user():
        lm += f'User: {query}'
        lm = lm.set('user_query', query)
    with assistant():
        lm += react_loop()
    return lm

@guidance
def react_loop(lm):
    while True:
        lm += select(['Thought', 'Act'], name='step')
        if lm['step'] == 'Act':
            lm += ': ' + select(['search', 'read_manual', 'gather_parameters', 'construct_command', 'execute_command', 'chat_with_user'], name='action')
            action = lm['action']
            if action == 'search':
                # generate the search query
                lm += f'''({select(['the best command line tools for', 'what is',  'how can I'], name='search_prefix')} {gen(stop=')', name='args')})\n'''
                # search the web and paste result into the prompt
                lm += search(f'{lm["search_prefix"]} {lm["args"]}')
            elif action == 'chat_with_user':
                # generate the response and stop the loop to wait for user input
                lm += f'''({gen(name='arg', stop=')')})\n'''
                break
            elif action == 'read_manual':
                # generate the search query
                lm += f'''({gen(name='arg', stop=')')})\n'''
                # search the web and paste result into the prompt
                lm += read_manual(lm['arg'])
            elif action == 'construct_command':
                lm += '()\n'
                lm += construct_command()
            elif action == 'gather_parameters':
                lm += '()\n'
                lm += gather_parameters()
            elif action == 'execute_command':
                if 'command' not in lm:
                    lm += '()\nThought: I need to construct the command first.\nAct: construct_command()\n'
                    lm += construct_command()
                else:
                    lm += ("()\nInternal Thinking:\nDo I have the user's permission to execute the command?\nAnswer:"
                    + select(['yes', 'no'], name='permission_to_execute'))
                    if lm['permission_to_execute'] == 'yes':
                        lm += f"({lm['command']})\n"
                        lm += execute_command(lm['command'])
                    else:
                        lm += '()\nThought: I need to ask the user for permission.\n'
        else:
            lm += ': ' + gen('thought', list_append=True, stop="Act:", stop_regex="^(\\n\\n)",max_tokens=300)
    return lm

In [47]:
# Pre-load the prompt and system message so we can reuse this
search_lm = llmc + init_system()

In [48]:
lm = search_lm + chat_search('how many pods are running in my kubernetes cluster?')