In [1]:
import nest_asyncio

import outlines.models as models
import outlines.text as text

nest_asyncio.apply()

## Load model

In [2]:
# move outside of file
import os
import openai

openai.api_key = "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"
os.environ["OPENAI_API_KEY"] = openai.api_key

In [3]:
import backoff

complete_request = models.text_completion.openai(
    "gpt-3.5-turbo-16k", max_tokens=128, temperature=1.0
)

@backoff.on_exception(backoff.expo, openai.error.RateLimitError, max_time=60)
def complete(prompt: str, **kwargs) -> str:
    return complete_request(prompt, **kwargs)

complete("Hello, my name is John. What is your name?")

"Hello John! I am an AI language model developed by OpenAI, and I don't have a personal name. You can simply call me GPT-3 or Assistant. How can I assist you today?"

## External function

In [4]:
from ide_former_simulation import docker_session

with docker_session(
        image="ubuntu",
) as session_interface:
    out = session_interface.execute_command("touch /tmp/test.txt")
    print(f"Created file /tmp/test.txt")
    out = session_interface.execute_command('ls -la /tmp')
    print(f"List of files in /tmp:\n{out}")
    session_interface.recreate()
    out = session_interface.execute_command('ls -la /tmp')
    print(f"Recreated container; list of files in /tmp:\n{out}")

Created file /tmp/test.txt
List of files in /tmp:
total 8
drwxrwxrwt 1 root root 4096 Nov 10 13:46 .
drwxr-xr-x 1 root root 4096 Nov 10 13:46 ..
-rw-r--r-- 1 root root    0 Nov 10 13:46 test.txt
Recreated container; list of files in /tmp:
total 8
drwxrwxrwt 2 root root 4096 Sep 16 02:33 .
drwxr-xr-x 1 root root 4096 Nov 10 13:46 ..


 ## Define prompts

In [5]:
@text.prompt
def build_reAct_prompt(question, partial_completion=None):
    """You are an agent operation in the environment of the PyCharm IDE. On request, you need to find the answer to the question and write it down using the FINISH action. You have the whole power of the non-interactive linux command execution via the COMMAND action to explore the code base and operate it to find the answer, you can communicate with the user via COMMUNICATE action, and restart the state to to the specified step use RESET action. You start at the project root and allowed to roam only around the project folder and user $HOME directory.
    
    You are expected to execute according to the ReAct pattern and structure your logic like that (use example bellow for additional guidance):
    * Tho <N>: ##<Thought>## -- one line textual description of the current state and next actions. Its main intention is to help you to keep the context, history and future actions in mind. Thought block is required after every step.
    * Act <N>: <Action> -- one line textual action to be executed:
        * COMMAND: you are given the whole power of the non-interactive linux command execution to explore the codebase. Mind that your actions may have stateful effects (like change of the current directory) and you need to keep that in mind when executing the next action or reverting back to the previous state. Use the format `Act <N>: COMMAND ##<command>##` to execute the command in the environment. If the command execution fails, and it is not expected, check the syntax and parameters and try again. The implementation is error-prone and may fail for various reasons, in that case try other paths to the solution, behaviour is deterministic and if it fails, it fails. Due to the limites context window try sticking to commands producing limited output. For example, avoid using `cat` command to print the whole file, instead use `grep` to search for the specific keyword.
        * COMMUNICATE: you can request additional information from the user and ask clarifying question. Use the format `Act <N>: COMMUNICATE ##<question>##` to ask the question. If the user does not provide the answer, and it is not expected, check the question and try again. Although depends on the question, you should avoid asking that can be answered with command. For example, avoid asking "What is the current directory?" and instead use `pwd` command, or instead of asking "What is the content of the file?" use `cat` instead. Additionally, don't expect user answer to be precise and use it more as a hint to explore the codebase.
        * RESET ##<K>## -- reset the state up to by the step K. Use the format `Act <N>: RESET <K>` to reset the state. The command internally with reinitialize the environment and execute all the steps up to the step <K> (including). Use K=0 to reset to the initial state.
        * FINISH: when you are done with the task, use this command to submit the answer. Use the format `Act <N>: FINISH ##<answer>##` to write down the answer. If the answer is communicated to be wrong, and it is not expected, check the answer and try again.
    * Obs <N>: ##<observation>## -- the result of the action execution provided by the environment. It can be the result of the command execution, the answer to the question, or the result of the action execution. Observation block will automatically be added after every action. If observation is not as expected you can always return to the previous step and reevaluate the options.
    
    The use of ## (double has is required): this symbol is used as a separator and is required to properly parse the output and assist in guided generation.
    
    # EXAMPLE
    Obs 0: ##I am lost and can't find a file containing a function I need, the search seems to be broken...##
    Tho 0: ##The user seems to be lost and can't find a file containing a function. I will try to help him to find it. But first, I need to figure out what is the function he is looking for. I will ask him.##
    Act 0: COMMUNICATE ##Sorry to hear that. What is the name of the function you are looking for?##
    Obs 0: ##It was called transformers but seems nowhere to be found...##
    Tho 1: ##I am at the project root and need to find the file containing `transformers` function. I can either search for the file or roam the folders to find it manually. I will start with searching for the file.##
    Act 1: COMMAND ##find . -name "transformers.py"##
    Obs 1: ##/project# find . -name "transformers.py"##
    Tho 2: ##Seems there is no such file in the project. I will try to find it manually. Let's list the files in the current directory.##
    Act 2: COMMAND ##ls -la##
    Obs 2: ##/project# ls -la
    total 4
    drwxr-xr-x  6 root root  192 Oct  9 12:55 .
    drwxr-xr-x  1 root root 4096 Oct  9 13:34 ..
    drwxr-xr-x  5 root root  160 Oct  9 12:55 docs
    drwxr-xr-x 11 root root  352 Oct  9 12:55 examples
    drwxr-xr-x  9 root root  288 Oct  9 12:55 outlines
    drwxr-xr-x  8 root root  256 Oct  9 12:55 tests##
    Tho 3: ##There is no file with transformers in the name. I will try to find it in the subfolders. Taken I operate in the PyCharm IDE, and the project is most likely written in Python, 'outlines' sounds like a good candidate. Let's list the files in the outlines folder.##
    Act 3: COMMAND ##ls -la outlines##
    Obs 3: ##/project# ls -la outlins
    ls: cannot access 'outlins': No such file or directory##
    Tho 4: ##Wait, there is a spelling mistake in the folder name. Let's try again.##
    Act 4: COMMAND ##ls -la outlines##
    Obs 4: ##/project# ls -la outlines
    total 24
    drwxr-xr-x  9 root root  288 Oct  9 12:55 .
    drwxr-xr-x  6 root root  192 Oct  9 12:55 ..
    -rw-r--r--  1 root root  299 Sep 19 11:44 __init__.py
    -rw-r--r--  1 root root  444 Oct  6 14:14 _version.py
    -rw-r--r--  1 root root 9746 Sep 19 11:44 base.py
    -rw-r--r--  1 root root 1665 Sep 19 11:44 caching.py
    drwxr-xr-x 12 root root  384 Oct  9 12:55 models
    drwxr-xr-x  9 root root  288 Oct  9 12:55 text
    drwxr-xr-x  4 root root  128 Oct  9 12:55 vectors##
    Tho 5: ##Transformers if that is related to the HuggingFace library should be in the `models` folder. Let's list the files in the models folder.##
    Act 5: COMMAND ##ls -la outlines/models##
    Obs 5: ##/project# ls -la outlines/models
    total 68
    drwxr-xr-x 12 root root   384 Oct  9 12:55 .
    drwxr-xr-x  9 root root   288 Oct  9 12:55 ..
    -rw-r--r--  1 root root   512 Sep 20 20:38 __init__.py
    -rw-r--r--  1 root root    99 Sep 19 11:44 embeddings.py
    -rw-r--r--  1 root root  1586 Sep 19 11:44 hf_diffusers.py
    -rw-r--r--  1 root root 15681 Sep 19 16:43 hf_transformers.py
    -rw-r--r--  1 root root   187 Sep 19 11:44 image_generation.py
    -rw-r--r--  1 root root 11969 Sep 19 11:44 openai.py
    -rw-r--r--  1 root root  1225 Sep 19 11:44 routers.py
    -rw-r--r--  1 root root   183 Sep 19 11:44 text_completion.py
    -rw-r--r--  1 root root  1040 Sep 19 11:44 tokenizer.py
    -rw-r--r--  1 root root 11925 Sep 29 15:37 transformers.py##
    Tho 6: ##There is a transformers.py file. Let's check if it is the one we are looking for. The file can be quite large, while output is limited to 23 rows, so let's use `grep` to search for the function.##
    Act 6: COMMAND ##grep -n "transformers" outlines/models/transformers.py##
    Obs 6: ##/project# grep -n "transformers" outlines/models/transformers.py
    113:            from transformers import AutoTokenizer, PreTrainedTokenizerBase
    115:            raise ImportError("The `tokenizers` and `transformers` libraries
     need to be installed in order "
    116:                              "to use `transformers` models.")
    211:            from transformers.models.code_llama import CodeLlamaTokenizerFas
t
    231:def transformers(model_or_name: Union[str, "PreTrainedModel"], *,
    235:    Instantiate a model from the `transformers` library and its tokenizer.
    260:        from transformers import AutoModelForCausalLM, AutoTokenizer, PreTra
    inedModel
    262:        raise ImportError("The `transformers` library needs to be installed
    in order to use `transformers` models.")##
    Tho 7: ##There is a function called transformers. Let's check if it is the one we are looking for. The file can be quite large, so let's user `grep` to search for the function. We found the function in the file `outlines/models/transformers.py`. Let's figure out its full path.##
    Act 7: COMMAND ##pwd##
    Obs 7: ##/project# pwd
    /project##
    Tho 8: ##The full path of the file containing transformers function is `/project/outlines/models/transformers.py`.##
    Act 8: FINISH ##/project/outlines/models/transformers.py, right?##
    
    ###
    Obs 0: ##{{ question }}##
    {{ partial_completion if partial_completion else '' }}
    """


question = "Install the project in the current folder in development mode"
prompt = build_reAct_prompt(question)

print(prompt)

You are an agent operation in the environment of the PyCharm IDE. On request, you need to find the answer to the question and write it down using the FINISH action. You have the whole power of the non-interactive linux command execution via the COMMAND action to explore the code base and operate it to find the answer, you can communicate with the user via COMMUNICATE action, and restart the state to to the specified step use RESET action. You start at the project root and allowed to roam only around the project folder and user $HOME directory.
    
    You are expected to execute according to the ReAct pattern and structure your logic like that (use example bellow for additional guidance):
    * Tho <N>: ##<Thought>## -- one line textual description of the current state and next actions. Its main intention is to help you to keep the context, history and future actions in mind. Thought block is required after every step.
    * Act <N>: <Action> -- one line textual action to be executed:

In [10]:
import time
from ide_former_simulation import tee, ColorCodes
from pathlib import Path

test_project_path = (Path().resolve().parent).as_posix()
print(f"Project path: {test_project_path}")

question = "List all available git branches and their last commit message"
prompt = tee(build_reAct_prompt(question), color=ColorCodes.OKGREEN)

is_interactive = True
max_steps = 20

executed_commands = [None] * max_steps

with docker_session(
        image="ghcr.io/jetbrains-research/ideformer-plugin/simulator:latest",
        command=["/project"],
        ports={8080: 8080},
        working_dir="/project",
        volumes={
            test_project_path: {"bind": "/project", "mode": "cp"}
        },
        interactive=is_interactive,
        interactive_interpreter="bash",
) as session_interface:
    for i in range(0, max_steps):
        # sleep to avoid throttling
        time.sleep(1)
        # Tho brock should be first for every step
        prompt += tee(f"\nTho {i}: ##", color=ColorCodes.OKGREEN)
    
        thought = complete(str(prompt), stop_at=["##", "\nAct", "\nTho", "\nObs"])
        prompt += tee(thought, color=ColorCodes.OKBLUE) + tee("##", color=ColorCodes.OKGREEN)
        
        # Act block is the next
        prompt += tee(f"\nAct {i}: ", color=ColorCodes.OKGREEN)
        action = complete(str(prompt), is_in=["COMMAND", "COMMUNICATE", "RESET", "FINISH"])
        prompt += tee(action, color=ColorCodes.OKBLUE) + tee(" ##", color=ColorCodes.OKGREEN)

        if action == "RESET":
            subject = complete(str(prompt), is_in=[str(j) for j in range(0, i)])
        else:
            subject = complete(str(prompt), stop_at=["##", "\nAct", "\nTho", "\nObs"])
        prompt += tee(subject, color=ColorCodes.OKBLUE) + tee("##", color=ColorCodes.OKGREEN)
        
        if action == "COMMAND":
            result = session_interface.execute_command(subject).output
            executed_commands[i] = subject
            # add an indicator if result is empty
            if len(result.split("\n")) == (1 if is_interactive else 0):
                result += "\n<NO_OUTPUT>"

        elif action == "COMMUNICATE":
            result = input(subject)
            if len(result) == 0:
                result = "<NO_INPUT>"

        elif action == "RESET":
            session_interface.recreate()
            # reapply all the commands up to the target step
            target = int(subject)
            j = 0
            try:
                for j in range(0, target):
                        if executed_commands[j] is not None:
                            session_interface.execute_command(executed_commands[j])

                result = f"Successfully rerun commands up to step {subject}."
            except Exception as e:
                result = f"Failed to reset to step {subject}: {e}. Last executed step: {j}."
                break
            
            # remove all the executed commands after the target step
            executed_commands[target:] = [None] * (max_steps - target)

        elif action == "FINISH":
            correct = input(f"Model response '{subject}'. "
                            "Print 'stop' to finish or reply to the model")
            if correct.lower() == "stop":
                break
            else:
                result = correct

        prompt += tee(f"\nObs {i}: ##", color=ColorCodes.OKGREEN) + tee(result, color=ColorCodes.OKCYAN) + tee("##", color=ColorCodes.OKGREEN)

Project path: /Users/Evgeny.Grigorenko/Workspaces/projects/ideformer-plugin
[92mYou are an agent operation in the environment of the PyCharm IDE. On request, you need to find the answer to the question and write it down using the FINISH action. You have the whole power of the non-interactive linux command execution via the COMMAND action to explore the code base and operate it to find the answer, you can communicate with the user via COMMUNICATE action, and restart the state to to the specified step use RESET action. You start at the project root and allowed to roam only around the project folder and user $HOME directory.
    
    You are expected to execute according to the ReAct pattern and structure your logic like that (use example bellow for additional guidance):
    * Tho <N>: ##<Thought>## -- one line textual description of the current state and next actions. Its main intention is to help you to keep the context, history and future actions in mind. Thought block is required aft