-
Notifications
You must be signed in to change notification settings - Fork 43k
/
chat.py
202 lines (177 loc) · 7.68 KB
/
chat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
from __future__ import annotations
import time
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from autogpt.agent.agent import Agent
from autogpt.config import Config
from autogpt.llm.api_manager import ApiManager
from autogpt.llm.base import ChatSequence, Message
from autogpt.llm.utils import count_message_tokens, create_chat_completion
from autogpt.log_cycle.log_cycle import CURRENT_CONTEXT_FILE_NAME
from autogpt.logs import logger
# TODO: Change debug from hardcode to argument
def chat_with_ai(
config: Config,
agent: Agent,
system_prompt: str,
user_input: str,
token_limit: int,
model: str | None = None,
):
"""
Interact with the OpenAI API, sending the prompt, user input,
message history, and permanent memory.
Args:
config (Config): The config to use.
agent (Agent): The agent to use.
system_prompt (str): The prompt explaining the rules to the AI.
user_input (str): The input from the user.
token_limit (int): The maximum number of tokens allowed in the API call.
model (str, optional): The model to use. If None, the config.fast_llm_model will be used. Defaults to None.
Returns:
str: The AI's response.
"""
if model is None:
model = config.fast_llm_model
# Reserve 1000 tokens for the response
logger.debug(f"Token limit: {token_limit}")
send_token_limit = token_limit - 1000
# if len(agent.history) == 0:
# relevant_memory = ""
# else:
# recent_history = agent.history[-5:]
# shuffle(recent_history)
# relevant_memories = agent.memory.get_relevant(
# str(recent_history), 5
# )
# if relevant_memories:
# shuffle(relevant_memories)
# relevant_memory = str(relevant_memories)
# logger.debug(f"Memory Stats: {agent.memory.get_stats()}")
relevant_memory = []
message_sequence = ChatSequence.for_model(
model,
[
Message("system", system_prompt),
Message("system", f"The current time and date is {time.strftime('%c')}"),
# Message(
# "system",
# f"This reminds you of these events from your past:\n{relevant_memory}\n\n",
# ),
],
)
# Add messages from the full message history until we reach the token limit
next_message_to_add_index = len(agent.history) - 1
insertion_index = len(message_sequence)
# Count the currently used tokens
current_tokens_used = message_sequence.token_length
# while current_tokens_used > 2500:
# # remove memories until we are under 2500 tokens
# relevant_memory = relevant_memory[:-1]
# (
# next_message_to_add_index,
# current_tokens_used,
# insertion_index,
# current_context,
# ) = generate_context(
# prompt, relevant_memory, agent.history, model
# )
# Account for user input (appended later)
user_input_msg = Message("user", user_input)
current_tokens_used += count_message_tokens([user_input_msg], model)
current_tokens_used += 500 # Reserve space for new_summary_message
# Add Messages until the token limit is reached or there are no more messages to add.
for cycle in reversed(list(agent.history.per_cycle())):
messages_to_add = [msg for msg in cycle if msg is not None]
tokens_to_add = count_message_tokens(messages_to_add, model)
if current_tokens_used + tokens_to_add > send_token_limit:
break
# Add the most recent message to the start of the chain,
# after the system prompts.
message_sequence.insert(insertion_index, *messages_to_add)
current_tokens_used += tokens_to_add
# Update & add summary of trimmed messages
if len(agent.history) > 0:
new_summary_message, trimmed_messages = agent.history.trim_messages(
current_message_chain=list(message_sequence),
)
tokens_to_add = count_message_tokens([new_summary_message], model)
message_sequence.insert(insertion_index, new_summary_message)
current_tokens_used += tokens_to_add - 500
# FIXME: uncomment when memory is back in use
# memory_store = get_memory(cfg)
# for _, ai_msg, result_msg in agent.history.per_cycle(trimmed_messages):
# memory_to_add = MemoryItem.from_ai_action(ai_msg, result_msg)
# logger.debug(f"Storing the following memory:\n{memory_to_add.dump()}")
# memory_store.add(memory_to_add)
api_manager = ApiManager()
# inform the AI about its remaining budget (if it has one)
if api_manager.get_total_budget() > 0.0:
remaining_budget = api_manager.get_total_budget() - api_manager.get_total_cost()
if remaining_budget < 0:
remaining_budget = 0
budget_message = f"Your remaining API budget is ${remaining_budget:.3f}" + (
" BUDGET EXCEEDED! SHUT DOWN!\n\n"
if remaining_budget == 0
else " Budget very nearly exceeded! Shut down gracefully!\n\n"
if remaining_budget < 0.005
else " Budget nearly exceeded. Finish up.\n\n"
if remaining_budget < 0.01
else "\n\n"
)
logger.debug(budget_message)
message_sequence.add("system", budget_message)
current_tokens_used += count_message_tokens([message_sequence[-1]], model)
# Append user input, the length of this is accounted for above
message_sequence.append(user_input_msg)
plugin_count = len(config.plugins)
for i, plugin in enumerate(config.plugins):
if not plugin.can_handle_on_planning():
continue
plugin_response = plugin.on_planning(
agent.config.prompt_generator, message_sequence.raw()
)
if not plugin_response or plugin_response == "":
continue
tokens_to_add = count_message_tokens(
[Message("system", plugin_response)], model
)
if current_tokens_used + tokens_to_add > send_token_limit:
logger.debug(f"Plugin response too long, skipping: {plugin_response}")
logger.debug(f"Plugins remaining at stop: {plugin_count - i}")
break
message_sequence.add("system", plugin_response)
# Calculate remaining tokens
tokens_remaining = token_limit - current_tokens_used
# assert tokens_remaining >= 0, "Tokens remaining is negative.
# This should never happen, please submit a bug report at
# https://www.github.com/Torantulino/Auto-GPT"
# Debug print the current context
logger.debug(f"Token limit: {token_limit}")
logger.debug(f"Send Token Count: {current_tokens_used}")
logger.debug(f"Tokens remaining for response: {tokens_remaining}")
logger.debug("------------ CONTEXT SENT TO AI ---------------")
for message in message_sequence:
# Skip printing the prompt
if message.role == "system" and message.content == system_prompt:
continue
logger.debug(f"{message.role.capitalize()}: {message.content}")
logger.debug("")
logger.debug("----------- END OF CONTEXT ----------------")
agent.log_cycle_handler.log_cycle(
agent.config.ai_name,
agent.created_at,
agent.cycle_count,
message_sequence.raw(),
CURRENT_CONTEXT_FILE_NAME,
)
# TODO: use a model defined elsewhere, so that model can contain
# temperature and other settings we care about
assistant_reply = create_chat_completion(
prompt=message_sequence,
max_tokens=tokens_remaining,
)
# Update full message history
agent.history.append(user_input_msg)
agent.history.add("assistant", assistant_reply, "ai_response")
return assistant_reply