Skip to content

Commit

Permalink
Local II
Browse files Browse the repository at this point in the history
  • Loading branch information
KillianLucas committed May 31, 2024
1 parent b917ddd commit e2ff17a
Show file tree
Hide file tree
Showing 17 changed files with 2,978 additions and 2,800 deletions.
162 changes: 162 additions & 0 deletions interpreter/core/archived_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import asyncio
import json
from typing import Generator

from .utils.lazy_import import lazy_import

uvicorn = lazy_import("uvicorn")
fastapi = lazy_import("fastapi")


def server(interpreter, host="0.0.0.0", port=8000):
FastAPI, Request, Response, WebSocket = (
fastapi.FastAPI,
fastapi.Request,
fastapi.Response,
fastapi.WebSocket,
)
PlainTextResponse = fastapi.responses.PlainTextResponse

app = FastAPI()

@app.post("/chat")
async def stream_endpoint(request: Request) -> Response:
async def event_stream() -> Generator[str, None, None]:
data = await request.json()
for response in interpreter.chat(message=data["message"], stream=True):
yield response

return Response(event_stream(), media_type="text/event-stream")

# Post endpoint
# @app.post("/iv0", response_class=PlainTextResponse)
# async def i_post_endpoint(request: Request):
# message = await request.body()
# message = message.decode("utf-8") # Convert bytes to string

# async def event_stream() -> Generator[str, None, None]:
# for response in interpreter.chat(
# message=message, stream=True, display=False
# ):
# if (
# response.get("type") == "message"
# and response["role"] == "assistant"
# and "content" in response
# ):
# yield response["content"] + "\n"
# if (
# response.get("type") == "message"
# and response["role"] == "assistant"
# and response.get("end") == True
# ):
# yield " \n"

# return StreamingResponse(event_stream(), media_type="text/plain")

@app.get("/test")
async def test_ui():
return PlainTextResponse(
"""
<!DOCTYPE html>
<html>
<head>
<title>Chat</title>
</head>
<body>
<form action="" onsubmit="sendMessage(event)">
<textarea id="messageInput" rows="10" cols="50" autocomplete="off"></textarea>
<button>Send</button>
</form>
<div id="messages"></div>
<script>
var ws = new WebSocket("ws://localhost:8000/");
var lastMessageElement = null;
ws.onmessage = function(event) {
if (lastMessageElement == null) {
lastMessageElement = document.createElement('p');
document.getElementById('messages').appendChild(lastMessageElement);
}
lastMessageElement.innerHTML += event.data;
};
function sendMessage(event) {
event.preventDefault();
var input = document.getElementById("messageInput");
var message = input.value;
if (message.startsWith('{') && message.endsWith('}')) {
message = JSON.stringify(JSON.parse(message));
}
ws.send(message);
var userMessageElement = document.createElement('p');
userMessageElement.innerHTML = '<b>' + input.value + '</b><br>';
document.getElementById('messages').appendChild(userMessageElement);
lastMessageElement = document.createElement('p');
document.getElementById('messages').appendChild(lastMessageElement);
input.value = '';
}
</script>
</body>
</html>
""",
media_type="text/html",
)

@app.websocket("/")
async def i_test(websocket: WebSocket):
await websocket.accept()
while True:
data = await websocket.receive_text()
while data.strip().lower() != "stop": # Stop command
task = asyncio.create_task(websocket.receive_text())

# This would be terrible for production. Just for testing.
try:
data_dict = json.loads(data)
if set(data_dict.keys()) == {"role", "content", "type"} or set(
data_dict.keys()
) == {"role", "content", "type", "format"}:
data = data_dict
except json.JSONDecodeError:
pass

for response in interpreter.chat(
message=data, stream=True, display=False
):
if task.done():
data = task.result() # Get the new message
break # Break the loop and start processing the new message
# Send out assistant message chunks
if (
response.get("type") == "message"
and response["role"] == "assistant"
and "content" in response
):
await websocket.send_text(response["content"])
await asyncio.sleep(0.01) # Add a small delay
if (
response.get("type") == "message"
and response["role"] == "assistant"
and response.get("end") == True
):
await websocket.send_text("\n")
await asyncio.sleep(0.01) # Add a small delay
if not task.done():
data = (
await task
) # Wait for the next message if it hasn't arrived yet

print(
"\nOpening a simple `interpreter.chat(data)` POST endpoint at http://localhost:8000/chat."
)
print(
"Opening an `i.protocol` compatible WebSocket endpoint at http://localhost:8000/."
)
print("\nVisit http://localhost:8000/test to test the WebSocket endpoint.\n")

import socket

hostname = socket.gethostname()
local_ip = socket.gethostbyname(hostname)
local_url = f"http://{local_ip}:8000"
print(f"Local URL: {local_url}\n")

uvicorn.run(app, host=host, port=port)
16 changes: 16 additions & 0 deletions interpreter/core/computer/ai/ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,22 @@ class Ai:
def __init__(self, computer):
self.computer = computer

def chat(self, text):
old_messages = self.computer.interpreter.llm.interpreter.messages
old_system_message = self.computer.interpreter.llm.interpreter.system_message
try:
self.computer.interpreter.llm.interpreter.system_message = (
"You are an AI assistant."
)
self.computer.interpreter.llm.interpreter.messages = []
response = self.computer.interpreter.llm.interpreter.chat(text)
finally:
self.computer.interpreter.llm.interpreter.messages = old_messages
self.computer.interpreter.llm.interpreter.system_message = (
old_system_message
)
return response[-1].get("content")

def query(self, text, query, custom_reduce_query=None):
if custom_reduce_query == None:
custom_reduce_query = query
Expand Down
43 changes: 24 additions & 19 deletions interpreter/core/computer/display/display.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,26 +94,31 @@ def screenshot(
:param screen: specify which display; 0 for primary and 1 and above for secondary.
:param combine_screens: If True, a collage of all display screens will be returned. Otherwise, a list of display screens will be returned.
"""
if not self.computer.emit_images and force_image == False:
screenshot = self.screenshot(show=False, force_image=True)

description = self.computer.vision.query(pil_image=screenshot)
print("A DESCRIPTION OF WHAT'S ON THE SCREEN: " + description)

if self.computer.max_output > 600:
print("ALL OF THE TEXT ON THE SCREEN: ")
text = self.get_text_as_list_of_lists(screenshot=screenshot)
pp = pprint.PrettyPrinter(indent=4)
pretty_text = pp.pformat(text) # language models like it pretty!
pretty_text = format_to_recipient(pretty_text, "assistant")
print(pretty_text)
print(
format_to_recipient(
"To recieve the text above as a Python object, run computer.display.get_text_as_list_of_lists()",
"assistant",
)
)
return screenshot # Still return a PIL image
# Since Local II, all images sent to local models will be rendered to text with moondream and pytesseract.
# So we don't need to do this here— we can just emit images.
# We should probably remove self.computer.emit_images for this reason.

# if not self.computer.emit_images and force_image == False:
# screenshot = self.screenshot(show=False, force_image=True)

# description = self.computer.vision.query(pil_image=screenshot)
# print("A DESCRIPTION OF WHAT'S ON THE SCREEN: " + description)

# if self.computer.max_output > 600:
# print("ALL OF THE TEXT ON THE SCREEN: ")
# text = self.get_text_as_list_of_lists(screenshot=screenshot)
# pp = pprint.PrettyPrinter(indent=4)
# pretty_text = pp.pformat(text) # language models like it pretty!
# pretty_text = format_to_recipient(pretty_text, "assistant")
# print(pretty_text)
# print(
# format_to_recipient(
# "To recieve the text above as a Python object, run computer.display.get_text_as_list_of_lists()",
# "assistant",
# )
# )
# return screenshot # Still return a PIL image

if quadrant == None:
if active_app_only:
Expand Down
11 changes: 1 addition & 10 deletions interpreter/core/computer/utils/computer_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,7 @@


def pytesseract_get_text(img):
# Convert PIL Image to NumPy array
img_array = np.array(img)

# Convert the image to grayscale
gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)

# Use pytesseract to get the text from the image
text = pytesseract.image_to_string(gray)

return text
return pytesseract.image_to_string(img)


def pytesseract_get_text_bounding_boxes(img):
Expand Down
92 changes: 83 additions & 9 deletions interpreter/core/computer/vision/vision.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import base64
import contextlib
import io
import os
import tempfile

from PIL import Image

from ...utils.lazy_import import lazy_import
from ..utils.computer_vision import pytesseract_get_text

# transformers = lazy_import("transformers") # Doesn't work for some reason! We import it later.

Expand All @@ -17,21 +21,79 @@ def __init__(self, computer):
def load(self):
import transformers # Wait until we use it. Transformers can't be lazy loaded for some reason!

print(
"Open Interpreter will use Moondream (tiny vision model) to describe images to the language model. Set `interpreter.llm.vision_renderer = None` to disable this behavior."
)
print(
"Alternativley, you can use a vision-supporting LLM and set `interpreter.llm.supports_vision = True`."
)
os.environ["TOKENIZERS_PARALLELISM"] = "false"

if self.computer.debug:
print(
"Open Interpreter will use Moondream (tiny vision model) to describe images to the language model. Set `interpreter.llm.vision_renderer = None` to disable this behavior."
)
print(
"Alternativley, you can use a vision-supporting LLM and set `interpreter.llm.supports_vision = True`."
)
model_id = "vikhyatk/moondream2"
revision = "2024-04-02"
print("loading model")

self.model = transformers.AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=revision
)
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
model_id, revision=revision
)

def ocr(
self,
base_64=None,
path=None,
lmc=None,
pil_image=None,
):
"""
Gets OCR of image.
"""

if lmc:
if "base64" in lmc["format"]:
# # Extract the extension from the format, default to 'png' if not specified
# if "." in lmc["format"]:
# extension = lmc["format"].split(".")[-1]
# else:
# extension = "png"
# Save the base64 content as a temporary file
img_data = base64.b64decode(lmc["content"])
with tempfile.NamedTemporaryFile(
delete=False, suffix=".png"
) as temp_file:
temp_file.write(img_data)
temp_file_path = temp_file.name

# Set path to the path of the temporary file
path = temp_file_path

elif lmc["format"] == "path":
# Convert to base64
path = lmc["content"]
elif base_64:
# Save the base64 content as a temporary file
img_data = base64.b64decode(base_64)
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
temp_file.write(img_data)
temp_file_path = temp_file.name

# Set path to the path of the temporary file
path = temp_file_path
elif path:
pass
elif pil_image:
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
pil_image.save(temp_file, format="PNG")
temp_file_path = temp_file.name

# Set path to the path of the temporary file
path = temp_file_path

return pytesseract_get_text(path)

def query(
self,
query="Describe this image.",
Expand All @@ -45,7 +107,16 @@ def query(
"""

if self.model == None and self.tokenizer == None:
self.load()
try:
with contextlib.redirect_stdout(
open(os.devnull, "w")
), contextlib.redirect_stderr(open(os.devnull, "w")):
self.load()
except ImportError:
self.computer.interpreter.display_markdown_message(
"\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n"
)
return ""

if lmc:
if "base64" in lmc["format"]:
Expand All @@ -71,5 +142,8 @@ def query(
elif pil_image:
img = pil_image

enc_image = self.model.encode_image(img)
return self.model.answer_question(enc_image, query, self.tokenizer)
with contextlib.redirect_stdout(open(os.devnull, "w")):
enc_image = self.model.encode_image(img)
answer = self.model.answer_question(enc_image, query, self.tokenizer)

return answer
Loading

0 comments on commit e2ff17a

Please sign in to comment.