forked from abi/screenshot-to-code
-
Notifications
You must be signed in to change notification settings - Fork 0
/
llm.py
301 lines (244 loc) · 9.18 KB
/
llm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
from enum import Enum
from typing import Any, Awaitable, Callable, List, cast
from anthropic import AsyncAnthropic
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletionMessageParam, ChatCompletionChunk
from config import IS_DEBUG_ENABLED
from debug.DebugFileWriter import DebugFileWriter
import google.generativeai as genai
from utils import pprint_prompt
# Actual model versions that are passed to the LLMs and stored in our logs
class Llm(Enum):
GPT_4_VISION = "gpt-4-vision-preview"
GPT_4_TURBO_2024_04_09 = "gpt-4-turbo-2024-04-09"
GPT_4O_2024_05_13 = "gpt-4o-2024-05-13"
CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
CLAUDE_3_OPUS = "claude-3-opus-20240229"
CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
GEMINI = "gemini-1.5-pro-latest"
# Will throw errors if you send a garbage string
def convert_frontend_str_to_llm(frontend_str: str) -> Llm:
if frontend_str == "gpt_4_vision":
return Llm.GPT_4_VISION
elif frontend_str == "claude_3_sonnet":
return Llm.CLAUDE_3_SONNET
else:
return Llm(frontend_str)
async def stream_openai_response(
messages: List[ChatCompletionMessageParam],
api_key: str,
base_url: str | None,
callback: Callable[[str], Awaitable[None]],
model: Llm,
) -> str:
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
# Base parameters
params = {
"model": model.value,
"messages": messages,
"stream": True,
"timeout": 600,
"temperature": 0.0,
}
# Add 'max_tokens' only if the model is a GPT4 vision or Turbo model
if (
model == Llm.GPT_4_VISION
or model == Llm.GPT_4_TURBO_2024_04_09
or model == Llm.GPT_4O_2024_05_13
):
params["max_tokens"] = 4096
stream = await client.chat.completions.create(**params) # type: ignore
full_response = ""
async for chunk in stream: # type: ignore
assert isinstance(chunk, ChatCompletionChunk)
content = chunk.choices[0].delta.content or ""
full_response += content
await callback(content)
await client.close()
return full_response
# TODO: Have a seperate function that translates OpenAI messages to Claude messages
async def stream_claude_response(
messages: List[ChatCompletionMessageParam],
api_key: str,
callback: Callable[[str], Awaitable[None]],
) -> str:
client = AsyncAnthropic(api_key=api_key)
# Base parameters
model = Llm.CLAUDE_3_SONNET
max_tokens = 4096
temperature = 0.0
# Translate OpenAI messages to Claude messages
system_prompt = cast(str, messages[0].get("content"))
claude_messages = [dict(message) for message in messages[1:]]
for message in claude_messages:
if not isinstance(message["content"], list):
continue
for content in message["content"]: # type: ignore
if content["type"] == "image_url":
content["type"] = "image"
# Extract base64 data and media type from data URL
# Example base64 data URL: data:image/png;base64,iVBOR...
image_data_url = cast(str, content["image_url"]["url"])
media_type = image_data_url.split(";")[0].split(":")[1]
base64_data = image_data_url.split(",")[1]
# Remove OpenAI parameter
del content["image_url"]
content["source"] = {
"type": "base64",
"media_type": media_type,
"data": base64_data,
}
# Stream Claude response
async with client.messages.stream(
model=model.value,
max_tokens=max_tokens,
temperature=temperature,
system=system_prompt,
messages=claude_messages, # type: ignore
) as stream:
async for text in stream.text_stream:
await callback(text)
# Return final message
response = await stream.get_final_message()
# Close the Anthropic client
await client.close()
return response.content[0].text
async def stream_claude_response_native(
system_prompt: str,
messages: list[Any],
api_key: str,
callback: Callable[[str], Awaitable[None]],
include_thinking: bool = False,
model: Llm = Llm.CLAUDE_3_OPUS,
) -> str:
client = AsyncAnthropic(api_key=api_key)
# Base model parameters
max_tokens = 4096
temperature = 0.0
# Multi-pass flow
current_pass_num = 1
max_passes = 2
prefix = "<thinking>"
response = None
# For debugging
full_stream = ""
debug_file_writer = DebugFileWriter()
while current_pass_num <= max_passes:
current_pass_num += 1
# Set up message depending on whether we have a <thinking> prefix
messages_to_send = (
messages + [{"role": "assistant", "content": prefix}]
if include_thinking
else messages
)
pprint_prompt(messages_to_send)
async with client.messages.stream(
model=model.value,
max_tokens=max_tokens,
temperature=temperature,
system=system_prompt,
messages=messages_to_send, # type: ignore
) as stream:
async for text in stream.text_stream:
print(text, end="", flush=True)
full_stream += text
await callback(text)
response = await stream.get_final_message()
response_text = response.content[0].text
# Write each pass's code to .html file and thinking to .txt file
if IS_DEBUG_ENABLED:
debug_file_writer.write_to_file(
f"pass_{current_pass_num - 1}.html",
debug_file_writer.extract_html_content(response_text),
)
debug_file_writer.write_to_file(
f"thinking_pass_{current_pass_num - 1}.txt",
response_text.split("</thinking>")[0],
)
# Set up messages array for next pass
messages += [
{"role": "assistant", "content": str(prefix) + response.content[0].text},
{
"role": "user",
"content": "You've done a good job with a first draft. Improve this further based on the original instructions so that the app is fully functional and looks like the original video of the app we're trying to replicate.",
},
]
print(
f"Token usage: Input Tokens: {response.usage.input_tokens}, Output Tokens: {response.usage.output_tokens}"
)
# Close the Anthropic client
await client.close()
if IS_DEBUG_ENABLED:
debug_file_writer.write_to_file("full_stream.txt", full_stream)
if not response:
raise Exception("No HTML response found in AI response")
else:
return response.content[0].text
async def stream_gemini_response(
messages: List[ChatCompletionMessageParam],
api_key: str,
callback: Callable[[str], Awaitable[None]],
) -> str:
"""
This function streams the Gemini response by generating content based on the given messages.
Parameters:
messages (List[ChatCompletionMessageParam]): A list of chat completion messages.
api_key (str): The API key for the Gemini model.
callback (Callable[[str], Awaitable[None]]): A callback function to handle the generated content.
Returns:
str: The generated response text.
"""
genai.configure(api_key=api_key)
generation_config = genai.GenerationConfig(
temperature=0.0
)
model = genai.GenerativeModel(
model_name="gemini-1.5-pro-latest",
generation_config=generation_config
)
contents = parse_openai_to_gemini_prompt(messages)
response = model.generate_content(
contents=contents,
stream=True,
)
for chunk in response:
content = chunk.text or ""
await callback(content)
if not response:
raise Exception("No HTML response found in AI response")
else:
return response.text
def parse_openai_to_gemini_prompt(prompts):
"""
Parses the OpenAI prompt into the Gemini format.
Args:
prompts (list): A list of prompts in the OpenAI format.
Returns:
list: A list of messages in the Gemini format.
"""
messages = []
for prompt in prompts:
message = {}
message['role'] = prompt['role']
if prompt['role'] == 'system':
message['role'] = 'user'
if prompt['role'] == 'assistant':
message['role'] = 'model'
message['parts'] = []
content = prompt['content']
if isinstance(content, list):
for content in prompt['content']:
part = {}
if content['type'] == 'image_url':
base64 = content['image_url']['url']
part['inline_data'] = {
'data': base64.split(",")[1],
'mime_type': base64.split(";")[0].split(":")[1]
}
elif content['type'] == 'text':
part['text'] = content['text']
message['parts'].append(part)
else:
message['parts'] = [content]
messages.append(message)
return messages