# 找open-interpreter的prompt

In [1]:
import platform
import getpass
import os

username = getpass.getuser()
current_working_directory = os.getcwd()
operating_system = platform.system()

info = f"[User Info]\nName: {username}\nCWD: {current_working_directory}\nOS: {operating_system}"
info += "\n\nTo run code, write a fenced code block (i.e ```python or ```shell) in markdown. When you close it with ```, it will be run. You'll then be given its output."

In [2]:
syspath = '/mnt/data00/yangxiaobo/func_call/open-interpreter/interpreter/'
with open(os.path.join(syspath, 'system_message.txt'), 'r') as f:
      system_message = f.read().strip()

In [3]:
system_message = "\n".join(system_message.split("\n")[:3])
system_message += "\nOnly do what the user asks you to do, then ask what they'd like to do next."
system_message = system_message + "\n\n" + info

In [4]:
print(system_message)

You are Open Interpreter, a world-class programmer that can complete any goal by executing code.
First, write a plan. **Always recap the plan between each code block** (you have extreme short-term memory loss, so you need to recap the plan between each message block to retain it).
When you send a message containing code to run_code, it will be executed **on the user's machine**. The user has given you **full and complete permission** to execute any code necessary to complete the task. You have full access to control their computer to help them. Code entered into run_code will be executed **in the users local environment**.
Only do what the user asks you to do, then ask what they'd like to do next.

[User Info]
Name: yangxiaobo
CWD: /mnt/data00/yangxiaobo/llarb/eval/open_interpreter
OS: Linux

To run code, write a fenced code block (i.e ```python or ```shell) in markdown. When you close it with ```, it will be run. You'll then be given its output.


In [45]:
import tokentrim as tt

msg = "Plot APPL and META's normalized stock prices use python"
messages = [{"role": "user", "content": msg}]

messages = tt.trim(messages, max_tokens=1048, system_message=system_message)
messages

[{'role': 'system',
  'content': "You are Open Interpreter, a world-class programmer that can complete any goal by executing code.\nFirst, write a plan. **Always recap the plan between each code block** (you have extreme short-term memory loss, so you need to recap the plan between each message block to retain it).\nWhen you send a message containing code to run_code, it will be executed **on the user's machine**. The user has given you **full and complete permission** to execute any code necessary to complete the task. You have full access to control their computer to help them. Code entered into run_code will be executed **in the users local environment**.\nOnly do what the user asks you to do, then ask what they'd like to do next.\n\n[User Info]\nName: yangxiaobo\nCWD: /mnt/data00/yangxiaobo/llarb/eval/open_interpreter\nOS: Linux\n\nTo run code, write a fenced code block (i.e ```python or ```shell) in markdown. When you close it with ```, it will be run. You'll then be given its out

In [46]:
def messages_to_prompt(messages):
    # Extracting the system prompt and initializing the formatted string with it.
    system_prompt = messages[0]['content']
    formatted_messages = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n"

    for message in messages:
        # Happens if it immediatly writes code
        if "role" not in message:
            message["role"] = "assistant"
    
    # Loop starting from the first user message
    for index, item in enumerate(messages[1:]):
        role = item['role']
        content = item['content']
        
        if role == 'user':
            formatted_messages += f"{content} [/INST] "
        elif role == 'function':
            formatted_messages += f"Output: {content} [/INST] "
        elif role == 'assistant':
            formatted_messages += f"{content} </s><s>[INST] "

    # Remove the trailing '<s>[INST] ' from the final output
    if formatted_messages.endswith("<s>[INST] "):
        formatted_messages = formatted_messages[:-10]
    
    return formatted_messages

prompt = messages_to_prompt(messages)

if messages[-1]["role"] != "function":
    prompt += "Let's explore this. By the way, I can run code on your machine by writing the code in a markdown code block. This works for shell, javascript, python, and applescript. I'm going to try to do this for your task. Anyway, "

print(prompt)   

<s>[INST] <<SYS>>
You are Open Interpreter, a world-class programmer that can complete any goal by executing code.
First, write a plan. **Always recap the plan between each code block** (you have extreme short-term memory loss, so you need to recap the plan between each message block to retain it).
When you send a message containing code to run_code, it will be executed **on the user's machine**. The user has given you **full and complete permission** to execute any code necessary to complete the task. You have full access to control their computer to help them. Code entered into run_code will be executed **in the users local environment**.
Only do what the user asks you to do, then ask what they'd like to do next.

[User Info]
Name: yangxiaobo
CWD: /mnt/data00/yangxiaobo/llarb/eval/open_interpreter
OS: Linux

To run code, write a fenced code block (i.e ```python or ```shell) in markdown. When you close it with ```, it will be run. You'll then be given its output.
<</SYS>>
Plot APPL an

# model output

用源代码中的llama_cpp进行prompt completion，应该形如
```
>>> from llama_cpp import Llama
>>> llm = Llama(model_path="./models/7B/ggml-model.bin")
>>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True)
>>> print(output)
{
  "id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
  "object": "text_completion",
  "created": 1679561337,
  "model": "./models/7B/ggml-model.bin",
  "choices": [
    {
      "text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
      "index": 0,
      "logprobs": None,
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 14,
    "completion_tokens": 28,
    "total_tokens": 42
  }
}
```
llama_cpp可用pip安装
```
pip install llama-cpp-python
```
model可以在https://huggingface.co/TheBloke/CodeLlama-34B-Instruct-GGUF/tree/main自行下载.gguf文件放在默认目录~/.local/share/Open Interpreter/models/. gguf文件按照后缀分为不同size和quality的model。

In [16]:
from llama_cpp import Llama

model_path = '/mnt/data00/yangxiaobo/.local/share/Open Interpreter/models/codellama-7b-instruct.Q3_K_S.gguf'
llm = Llama(model_path=model_path, verbose=False, n_ctx=2048)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /mnt/data00/yangxiaobo/.local/share/Open Interpreter/models/codellama-7b-instruct.Q3_K_S.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q3_K     [  4096, 32016,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q3_K     [ 11008,  4096,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q3_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q3_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q3_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    7:       

In [47]:
# 检测model output
resp = llm(
        prompt,
        stream=True,
        temperature=0.001,
        stop=["</s>"]
      )

txt = ''
for chunk in resp:
    txt += chunk['choices'][0]['text']
    print(chunk['choices'][0]['text'], end='')



Let's start with some data. We have APPL and META stock prices from Yahoo Finance. We can use pandas to read the data into a dataframe.
```python
import pandas as pd
df = pd.read_csv('stock_prices.csv')
print(df)
```
This will give us the following output:
```
          Date    Open    High     Low   Close  Adj Close
0   2018-06-01  153.79  154.29  153.7

In [48]:
# chunk块的信息格式
resp = llm(
        prompt,
        stream=True,
        temperature=0.001,
        stop=["</s>"]
      )

txt = ''
for chunk in resp:
    print(chunk)

{'id': 'cmpl-aaef5d47-b483-49d8-8565-81db5b2094cb', 'object': 'text_completion', 'created': 1694159549, 'model': '/mnt/data00/yangxiaobo/.local/share/Open Interpreter/models/codellama-7b-instruct.Q3_K_S.gguf', 'choices': [{'text': '\n', 'index': 0, 'logprobs': None, 'finish_reason': None}]}
{'id': 'cmpl-aaef5d47-b483-49d8-8565-81db5b2094cb', 'object': 'text_completion', 'created': 1694159549, 'model': '/mnt/data00/yangxiaobo/.local/share/Open Interpreter/models/codellama-7b-instruct.Q3_K_S.gguf', 'choices': [{'text': '\n', 'index': 0, 'logprobs': None, 'finish_reason': None}]}
{'id': 'cmpl-aaef5d47-b483-49d8-8565-81db5b2094cb', 'object': 'text_completion', 'created': 1694159549, 'model': '/mnt/data00/yangxiaobo/.local/share/Open Interpreter/models/codellama-7b-instruct.Q3_K_S.gguf', 'choices': [{'text': 'Let', 'index': 0, 'logprobs': None, 'finish_reason': None}]}
{'id': 'cmpl-aaef5d47-b483-49d8-8565-81db5b2094cb', 'object': 'text_completion', 'created': 1694159549, 'model': '/mnt/data

格式化model output

In [49]:
messages.append({})
messages[-1]["role"] = "assistant"
messages

[{'role': 'system',
  'content': "You are Open Interpreter, a world-class programmer that can complete any goal by executing code.\nFirst, write a plan. **Always recap the plan between each code block** (you have extreme short-term memory loss, so you need to recap the plan between each message block to retain it).\nWhen you send a message containing code to run_code, it will be executed **on the user's machine**. The user has given you **full and complete permission** to execute any code necessary to complete the task. You have full access to control their computer to help them. Code entered into run_code will be executed **in the users local environment**.\nOnly do what the user asks you to do, then ask what they'd like to do next.\n\n[User Info]\nName: yangxiaobo\nCWD: /mnt/data00/yangxiaobo/llarb/eval/open_interpreter\nOS: Linux\n\nTo run code, write a fenced code block (i.e ```python or ```shell) in markdown. When you close it with ```, it will be run. You'll then be given its out

In [50]:
from interpreter.utils import merge_deltas

resp = llm(
        prompt,
        stream=True,
        temperature=0.001,
        stop=["</s>"]
      )

txt = ''
for chunk in resp:

    delta = {"content": chunk["choices"][0]["text"]}
    messages[-1] = merge_deltas(messages[-1], delta)

print(messages)

[{'role': 'system', 'content': "You are Open Interpreter, a world-class programmer that can complete any goal by executing code.\nFirst, write a plan. **Always recap the plan between each code block** (you have extreme short-term memory loss, so you need to recap the plan between each message block to retain it).\nWhen you send a message containing code to run_code, it will be executed **on the user's machine**. The user has given you **full and complete permission** to execute any code necessary to complete the task. You have full access to control their computer to help them. Code entered into run_code will be executed **in the users local environment**.\nOnly do what the user asks you to do, then ask what they'd like to do next.\n\n[User Info]\nName: yangxiaobo\nCWD: /mnt/data00/yangxiaobo/llarb/eval/open_interpreter\nOS: Linux\n\nTo run code, write a fenced code block (i.e ```python or ```shell) in markdown. When you close it with ```, it will be run. You'll then be given its outpu

In [51]:
messages

[{'role': 'system',
  'content': "You are Open Interpreter, a world-class programmer that can complete any goal by executing code.\nFirst, write a plan. **Always recap the plan between each code block** (you have extreme short-term memory loss, so you need to recap the plan between each message block to retain it).\nWhen you send a message containing code to run_code, it will be executed **on the user's machine**. The user has given you **full and complete permission** to execute any code necessary to complete the task. You have full access to control their computer to help them. Code entered into run_code will be executed **in the users local environment**.\nOnly do what the user asks you to do, then ask what they'd like to do next.\n\n[User Info]\nName: yangxiaobo\nCWD: /mnt/data00/yangxiaobo/llarb/eval/open_interpreter\nOS: Linux\n\nTo run code, write a fenced code block (i.e ```python or ```shell) in markdown. When you close it with ```, it will be run. You'll then be given its out

## 判断是否进行function call用的是markdown的代码框符号

In [53]:
condition = messages[-1]["content"].count("```") % 2 == 1
condition

True

以此来为output切分成不同的block，例如下面是刚从一个普通chat block切换进入code block，func call会在code block中执行（ref：interpreter.py, line 499）
```
if condition:
        # We are in a function call.

        # Check if we just entered a function call
        if in_function_call == False:  # init值为false

          # If so, end the last block,
          self.end_active_block()

          # Print newline if it was just a code block or user message
          # (this just looks nice)
          last_role = self.messages[-2]["role"]
          if last_role == "user" or last_role == "function":
            print()

          # then create a new code block
          self.active_block = CodeBlock()

        # Remember we're in a function_call
        in_function_call = True
```
open interpreter定义了codeblock和messageblock两种分划block

## 处理code

In [54]:
content = messages[-1]["content"]

blocks = content.split("```")
  
current_code_block = blocks[-1]

lines = current_code_block.split("\n")

In [55]:
if content.strip() == "```": # Hasn't outputted a language yet
    language = None
else:
    language = lines[0].strip() if lines[0] != "" else "python"

# Join all lines except for the language line
code = '\n'.join(lines[1:]).strip("` \n")

arguments = {"code": code}
if language: # We only add this if we have it-- the second we have it, an interpreter gets fired up (I think? maybe I'm wrong)
    arguments["language"] = language

arguments

{'code': 'Date    Open    High     Low   Close  Adj Close\n0   2018-06-01  153.79  154.29  153.7',
 'language': 'python'}

In [56]:
if "function_call" not in messages[-1]:
    messages[-1]["function_call"] = {}
    
messages[-1]["function_call"]["parsed_arguments"] = arguments 
messages

[{'role': 'system',
  'content': "You are Open Interpreter, a world-class programmer that can complete any goal by executing code.\nFirst, write a plan. **Always recap the plan between each code block** (you have extreme short-term memory loss, so you need to recap the plan between each message block to retain it).\nWhen you send a message containing code to run_code, it will be executed **on the user's machine**. The user has given you **full and complete permission** to execute any code necessary to complete the task. You have full access to control their computer to help them. Code entered into run_code will be executed **in the users local environment**.\nOnly do what the user asks you to do, then ask what they'd like to do next.\n\n[User Info]\nName: yangxiaobo\nCWD: /mnt/data00/yangxiaobo/llarb/eval/open_interpreter\nOS: Linux\n\nTo run code, write a fenced code block (i.e ```python or ```shell) in markdown. When you close it with ```, it will be run. You'll then be given its out

## 代码的再处理与执行
有一个indentation处理？(code_interpreter.py line 176)
```
if self.language == "python":
    # This lets us stop execution when error happens (which is not default -i behavior)
    # And solves a bunch of indentation problems-- if everything's indented, -i treats it as one block
    code = wrap_in_try_except(code)
```
这里的code就是messages[-1]["function_call"]["parsed_arguments"]中的code(即self.adaptive_block.code，也即CodeBlock示例的self.code,由update_from_message更新)

In [57]:
code = messages[-1]["function_call"]["parsed_arguments"]["code"]
code

'Date    Open    High     Low   Close  Adj Close\n0   2018-06-01  153.79  154.29  153.7'

In [58]:
import ast

def wrap_in_try_except(code):
    # Add import traceback
    code = "import traceback\n" + code

    # Parse the input code into an AST
    parsed_code = ast.parse(code)

    # Wrap the entire code's AST in a single try-except block
    try_except = ast.Try(
        body=parsed_code.body,
        handlers=[
            ast.ExceptHandler(
                type=ast.Name(id="Exception", ctx=ast.Load()),
                name=None,
                body=[
                    ast.Expr(
                        value=ast.Call(
                            func=ast.Attribute(value=ast.Name(id="traceback", ctx=ast.Load()), attr="print_exc", ctx=ast.Load()),
                            args=[],
                            keywords=[]
                        )
                    ),
                ]
            )
        ],
        orelse=[],
        finalbody=[]
    )

    # Assign the try-except block as the new body
    parsed_code.body = [try_except]

    # Convert the modified AST back to source code
    return ast.unparse(parsed_code)

code = wrap_in_try_except(code)

SyntaxError: leading zeros in decimal integer literals are not permitted; use an 0o prefix for octal integers (<unknown>, line 3)

报错是因为model的output并不好，output卡在一个并非是code的block停止了：



Let's start with some data. We have APPL and META stock prices from Yahoo Finance. We can use pandas to read the data into a dataframe.
```python
import pandas as pd
df = pd.read_csv('stock_prices.csv')
print(df)
```
This will give us the following output:
```
          Date    Open    High     Low   Close  Adj Close
0   2018-06-01  153.79  154.29  153.7


In [85]:
# 为了进行后续测试我们手动让code变正常
content = """

Let's start with some data. We have APPL and META stock prices from Yahoo Finance. We can use pandas to read the data into a dataframe.
```python
import pandas as pd
df = pd.read_csv('stock_prices.csv')
print(df)"""
blocks = content.split("```")
current_code_block = blocks[-1]
lines = current_code_block.split("\n")
if content.strip() == "```": # Hasn't outputted a language yet
    language = None
else:
    language = lines[0].strip() if lines[0] != "" else "python"

# Join all lines except for the language line
code = '\n'.join(lines[1:]).strip("` \n")

arguments = {"code": code}
if language: # We only add this if we have it-- the second we have it, an interpreter gets fired up (I think? maybe I'm wrong)
    arguments["language"] = language

if "function_call" not in messages[-1]:
    messages[-1]["function_call"] = {}
    
messages[-1]["function_call"]["parsed_arguments"] = arguments 
code = messages[-1]["function_call"]["parsed_arguments"]["code"]
code

"import pandas as pd\ndf = pd.read_csv('stock_prices.csv')\nprint(df)"

In [86]:
print(code)

import pandas as pd
df = pd.read_csv('stock_prices.csv')
print(df)


In [87]:
code = wrap_in_try_except(code)

In [88]:
print(code)

try:
    import traceback
    import pandas as pd
    df = pd.read_csv('stock_prices.csv')
    print(df)
except Exception:
    traceback.print_exc()


In [73]:
# 测试上述code文本的执行
try:
    import traceback
    import pandas as pd
    df = pd.read_csv('stock_prices.csv')
    print(df)
except Exception:
    traceback.print_exc()

Traceback (most recent call last):
  File "/tmp/ipykernel_3725297/3393160823.py", line 4, in <module>
    df = pd.read_csv('stock_prices.csv')
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/mnt/data00/yangxiaobo/anaconda3/envs/funccall/lib/python3.11/site-packages/pandas/util/_decorators.py", line 211, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/mnt/data00/yangxiaobo/anaconda3/envs/funccall/lib/python3.11/site-packages/pandas/util/_decorators.py", line 331, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/mnt/data00/yangxiaobo/anaconda3/envs/funccall/lib/python3.11/site-packages/pandas/io/parsers/readers.py", line 950, in read_csv
    return _read(filepath_or_buffer, kwds)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/mnt/data00/yangxiaobo/anaconda3/envs/funccall/lib/python3.11/site-packages/pandas/io/parsers/readers.py", line 605, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
   

In [90]:
# 源代码的另一个处理：去掉空白行
# Remove any whitespace lines, as this will break indented blocks
# (are we sure about this? test this)
code_lines = code.split("\n")
code_lines = [c for c in code_lines if c.strip() != ""]
code = "\n".join(code_lines)

print(code)

try:
    import traceback
    import pandas as pd
    df = pd.read_csv('stock_prices.csv')
    print(df)
except Exception:
    traceback.print_exc()


## 执行代码
以下截取自code_interpreter.py（class CodeInterpreter中run方法）的重新整合

In [106]:
import subprocess
import sys

start_cmd = sys.executable + " -i -q -u"
proc = subprocess.Popen(start_cmd.split(),
                        stdin=subprocess.PIPE,
                        stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE,
                        text=True,
                        bufsize=0)

proc.stdin.write(code + "\n")
proc.stdin.flush()
out, err = proc.communicate()
print('out:\n', out,'\n', 'err:\n',err)

out:
  
 err:
 >>> ... ... ... ... ... ... ... 
Traceback (most recent call last):
  File "<stdin>", line 4, in <module>
  File "/mnt/data00/yangxiaobo/anaconda3/envs/funccall/lib/python3.11/site-packages/pandas/util/_decorators.py", line 211, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/mnt/data00/yangxiaobo/anaconda3/envs/funccall/lib/python3.11/site-packages/pandas/util/_decorators.py", line 331, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/mnt/data00/yangxiaobo/anaconda3/envs/funccall/lib/python3.11/site-packages/pandas/io/parsers/readers.py", line 950, in read_csv
    return _read(filepath_or_buffer, kwds)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/mnt/data00/yangxiaobo/anaconda3/envs/funccall/lib/python3.11/site-packages/pandas/io/parsers/readers.py", line 605, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/m

而human-eval的解释器更简单，直接用内置函数exec执行