diff --git a/DAIA.py b/DAIA.py index 74544ef..2f538a8 100644 --- a/DAIA.py +++ b/DAIA.py @@ -1,4 +1,4 @@ -# DAIA - Digital Artificial Inteligence Agent +# DAIA - Digital Artificial Intelligence Agent # Copyright (C) 2023 Envedity # # This program is free software: you can redistribute it and/or modify @@ -14,7 +14,8 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from DAIA_GPT4V.run import run +from Versions.DAIA_GPT4V_PreProgrammed.run import run +from Versions.DAIA_GPT4V_FreeThink.run import run_f from config import openai_api_key @@ -37,7 +38,7 @@ def main(key: str = None) -> None: try: option = int( input( - "\nOptions\n[1] DAIA_GPT-4-with-Vision\n[2] DAIA_Continues\n\nSelect Option: " + "\nVersions:\n[1] DAIA_GPT-4-with-Vision-PreProgramed (Pre-Program DAIA's thinking, based on human thinking)\n[2] DAIA_GPT-4-with-Vision-FreeThink (Let the DAIA think on its own)\n\nSelect DAIA Version: " ) ) @@ -50,7 +51,7 @@ def main(key: str = None) -> None: return run(api_key=key) case 2: - return print("Currently Unavaiable.") + return run_f(api_key=key) case _: return print("Invalid Option.") diff --git a/Design/.$DAIA (GPT Vision).drawio.bkp b/Design/.$DAIA (GPT Vision).drawio.bkp deleted file mode 100644 index f84192b..0000000 --- a/Design/.$DAIA (GPT Vision).drawio.bkp +++ /dev/nulldiff --git a/LICENCE b/LICENSE similarity index 100% rename from LICENCE rename to LICENSE diff --git a/README.md b/README.md index 0c550f4..d68ae04 100644 --- a/README.md +++ b/README.md @@ -6,23 +6,25 @@ DAIA is a powerful Digital Artificial Intelligence Agent that enables intelligen ## Table of Contents 📜 -1. [Introduction](#1-introduction-) -2. [Features](#2-features-) -3. [Usage](#3-usage-) -4. [Contribution](#4-contribution-) -5. [License](#5-license-) +1. [Introduction](#1-introduction-🚀) +2. [Overview](#2-overview-📖) +3. [Features](#3-features-🌟) +4. [Versions](#4-versions-✨️) +5. [Usage](#5-usage-🤝) +6. [Contribution](#6-contribution-🙌) +7. [License](#7-license-📄) ## 1. Introduction 🚀 -DAIA is a cutting-edge AI agent designed to enhance your productivity by intelligently interacting with your computer and completing big goals. With DAIA, you can complete goals, automate tasks, gather information, and perform various operations seamlessly, just like you would do yourself. +DAIA is a cutting-edge visual AI agent designed to enhance your productivity by intelligently interacting with your computer (or other computers) and completing big goals. With DAIA, you can complete goals, automate tasks, gather information, and perform various operations seamlessly, just like you would do yourself. -The main difference with the DAIA compared to other AI Agents is that it interacts with your computer through a vision system (GPT-4V) and task completion system allowing it to be capable of doing many more tasks and goals compared to other AI Agents that use the Terminal or CMD for interaction with the computer. -Furthermore, the DAIA will be built with a built-in memory, self-evaluating and optimizing system from the start. +## 2. Overview 📖 -Here is our current blueprint for the DAIA and its features: -![DAIA_blueprint](Design/DAIA%20(GPT%20Vision).png) +The main difference with the DAIA compared to other AI Agents is that it interacts with your computer through a vision system (DVAI(GPT-4V)) and task completion system allowing it to be capable of doing many more tasks and goals compared to other AI Agents that use the Terminal or command prompt for interaction with the computer. +Furthermore, the DAIA will be built with a built-in memory, self-evaluating and optimizing system from the start. -## 2. Features 🌟 +## 3. Features 🌟 +(for both versions) - **Intelligent Interaction**: DAIA can interact with your PC using natural language, making it easy to communicate your needs. (in progress) @@ -36,7 +38,7 @@ Here is our current blueprint for the DAIA and its features: - **Customization**: Tailor DAIA to your specific needs and preferences through custom scripts and plugins. (not done yet) -- **Memory**: Each action is saved into the memory, allowing you to start where you left. (in progress) +- **Memory**: Each action is saved into the memory, allowing you to start where you left off. (in progress) - **Security**: Ensure your data and interactions are secure with robust encryption and privacy measures. (not done yet) @@ -44,24 +46,51 @@ Here is our current blueprint for the DAIA and its features: - **Automation of Big Goals**: DAIA can automate big goals with its capability to make multiple versions of itself, therefore making the process faster. (in progress) -## 3. Usage 🤝 +## 4. Versions ✨️ + +- ### DAIA-PreProgrammed +This version of DAIA is pre-programmed with a set of prompts and processes that it will follow in its programmed order. Therefore this version is much more complex. + +Here is our current blueprint for the pre-programmed DAIA and its features: +![DAIA_preprogrammed_blueprint](Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA%20(GPT%20Vision).png) + +- ### DAIA-FreeThink +This version of DAIA is much more free to complete the goals you give it in its own way, kind of like ChatGPT, but if it had vision and the ability to interact with your computer. Therefore it is much simpler in its design however, it can still scale if possible. (memory, optimization and a functional vision system coming soon) + +Here is our current blueprint for the free think DAIA and its features: +![DAIA_freethink_blueprint](Versions/DAIA_GPT4V_FreeThink/Design/DAIA.png) + +## 5. Usage 🤝 1. Install the DAIA by running `git clone https://github.com/Envedity/DAIA.git` in your desired path, or by downloading and extracting the zip file. 2. Make a python 3.11 env using the `requirements.txt` file. 3. Run the DAIA.py file by typing `python DAIA.py` in the DAIA directory. -4. Choose a version of the DAIA you want to use. -5. Give it a goal. -6. Let it know if you agree with what it suggests for goal compleation. -7. Sit back and let the DAIA compleate your goal all by itself +4. Choose a version of the DAIA you want to use: + +- #### DAIA-PreProgrammed version: +1. Choose `DAIA_GPT4V-PreProgrammed` by typing `1` in the prompt. +2. Give it a goal. +3. Let it know if you agree with what it suggests for goal completion. +4. Sit back and let the DAIA complete your goal all by itself + +- #### DAIA-FreeThink version (as of now): +1. Choose `DAIA_GPT4V-FreeThink` by typing `2` in the prompt. +2. Give it a goal, or other request you want it to do. +3. Wait for its response and respond, you can stop when you think your goal is completed. -## 4. Contribution 🙌 +## 6. Contribution 🙌 We welcome contributions from the DAIA community to help improve and expand the capabilities of our AI agent. It is still in its early development stage so there is a lot to be done and we urgently need your support in this effort. Here's how you can contribute: - **Join Our Discord Server**: If you're a developer or someone who is interested in contributing, please join our Discord server The Envedity Network at: https://discord.gg/V4T6QFUw9c, there you can become a developer and will be able to directly contribute to the main DAIA repo with us, as well as share your feedback, suggestions, and bug reports with us and more.. Your insights are valuable in shaping the future of DAIA -Here is what we have already done from the blueprint: -![progress_made_on_DAIA_blueprint](Design/DAIA%20(GPT%20Vision)%20progress.png) +Here is what we have already done from the DAIA blueprints: + +PreProgrammed: +![progress_made_on_DAIA_preprogrammed_blueprint](Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA%20(GPT%20Vision)%20progress.png) + +FreeThink: +![progress_made_on_DAIA_freethink_blueprint](Versions/DAIA_GPT4V_FreeThink/Design/DAIA_progress.png) - **Feedback**: Share your feedback, suggestions, and bug reports with us. You can do this by [opening an issue](https://github.com/Envedity/DAIA/issues) on our feedback repository or in our Discord server. @@ -73,8 +102,8 @@ We appreciate your support in making DAIA even better for all users! 🙏 Let's build the future of AGI Together! -## 5. License 📄 +## 7. License 📄 -DAIA is under the GNU Version 3 licence (https://fsf.org/). +DAIA is under the GNU Version 3 license (https://fsf.org/). For the most up-to-date information, visit [DAIA's official website](https://envedity.github.io/). 🌐🚀 diff --git a/DAIA_GPT4V/DVAI/GPT_4_with_Vision.py b/Versions/DAIA_GPT4V_FreeThink/DVAI/GPT_4_with_Vision.py similarity index 98% rename from DAIA_GPT4V/DVAI/GPT_4_with_Vision.py rename to Versions/DAIA_GPT4V_FreeThink/DVAI/GPT_4_with_Vision.py index c195713..7798db7 100644 --- a/DAIA_GPT4V/DVAI/GPT_4_with_Vision.py +++ b/Versions/DAIA_GPT4V_FreeThink/DVAI/GPT_4_with_Vision.py @@ -1,4 +1,4 @@ -# DAIA - Digital Artificial Inteligence Agent +# DAIA - Digital Artificial Intelligence Agent # Copyright (C) 2023 Envedity # # This program is free software: you can redistribute it and/or modify diff --git a/Versions/DAIA_GPT4V_FreeThink/Design/DAIA.drawio b/Versions/DAIA_GPT4V_FreeThink/Design/DAIA.drawio new file mode 100644 index 0000000..73feb01 --- /dev/null +++ b/Versions/DAIA_GPT4V_FreeThink/Design/DAIA.drawiodiff --git a/Versions/DAIA_GPT4V_FreeThink/Design/DAIA.pdf b/Versions/DAIA_GPT4V_FreeThink/Design/DAIA.pdf new file mode 100644 index 0000000..d67ccc7 Binary files /dev/null and b/Versions/DAIA_GPT4V_FreeThink/Design/DAIA.pdf differ diff --git a/Versions/DAIA_GPT4V_FreeThink/Design/DAIA.png b/Versions/DAIA_GPT4V_FreeThink/Design/DAIA.png new file mode 100644 index 0000000..0575927 Binary files /dev/null and b/Versions/DAIA_GPT4V_FreeThink/Design/DAIA.png differ diff --git a/Versions/DAIA_GPT4V_FreeThink/Design/DAIA_progress.drawio b/Versions/DAIA_GPT4V_FreeThink/Design/DAIA_progress.drawio new file mode 100644 index 0000000..c86cc67 --- /dev/null +++ b/Versions/DAIA_GPT4V_FreeThink/Design/DAIA_progress.drawiodiff --git a/Versions/DAIA_GPT4V_FreeThink/Design/DAIA_progress.pdf b/Versions/DAIA_GPT4V_FreeThink/Design/DAIA_progress.pdf new file mode 100644 index 0000000..76785b7 Binary files /dev/null and b/Versions/DAIA_GPT4V_FreeThink/Design/DAIA_progress.pdf differ diff --git a/Versions/DAIA_GPT4V_FreeThink/Design/DAIA_progress.png b/Versions/DAIA_GPT4V_FreeThink/Design/DAIA_progress.png new file mode 100644 index 0000000..0f3c225 Binary files /dev/null and b/Versions/DAIA_GPT4V_FreeThink/Design/DAIA_progress.png differ diff --git a/Versions/DAIA_GPT4V_FreeThink/Interpreter/interpreter.py b/Versions/DAIA_GPT4V_FreeThink/Interpreter/interpreter.py new file mode 100644 index 0000000..71ebf7b --- /dev/null +++ b/Versions/DAIA_GPT4V_FreeThink/Interpreter/interpreter.py @@ -0,0 +1,278 @@ +# DAIA - Digital Artificial Intelligence Agent +# Copyright (C) 2023 Envedity +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from openai import OpenAI +from Versions.DAIA_GPT4V_FreeThink.OS_control.os_controller import OSController +from Versions.DAIA_GPT4V_FreeThink.DVAI.GPT_4_with_Vision import DVAI +from pathlib import Path +from random import randint +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from time import sleep +import subprocess +import re + + +class Interpreter: + """ + Interpret commands of the given input content + + Supported commands: + 'write_down': self.write_down, + 'retrieve': self.retrieve, + 'get_os_info': self.get_os_info, + 'web': self.web, + 'terminal': self.terminal, + 'click': self.click, + 'click_on_item': self.click_on_item, + 'move_cursor_to': self.move_cursor_to, + 'move_cursor_to_item': self.move_cursor_to_item, + 'keyboard': self.keyboard, + 'screenshot': self.screenshot, + 'wait': self.wait, + """ + + def __init__(self, api_key: str): + self.api_key = api_key + + # Initialize the OpenAI client and the commands + self.client = OpenAI(api_key=api_key) + self.commands = { + "write_down": self.write_down, + "retrieve": self.retrieve, + "get_os_info": self.get_os_info, + "web": self.web, + "terminal": self.terminal, + "click": self.click, + "click_on_item": self.click_on_item, + "move_cursor_to": self.move_cursor_to, + "move_cursor_to_item": self.move_cursor_to_item, + "keyboard": self.keyboard, + "screenshot": self.screenshot, + "wait": self.wait, + } + + self.os_controller = OSController() + + def interpret_commands(self, input_content: str): + """ + See if the commands are in the input_content parameter, and if they are then run them and return the output + """ + + commands = self.extract_commands(input_content) + + # Check if there are any commands + if commands == "": + return "" + + # Running commands + command_returns = [] + for command in commands: + command_name, params = command + + # If the commands do not have a parameter + if len(params) <= 0 or len(params[0]) == 0: + if command_name in self.commands: + command_function = self.commands[command_name] + command_return = command_function() + command_returns.append( + f"{command_name} command return: {command_return}" + ) + continue + + # If there is only one parameter to the command, then convert the parameter to a string + if len(params) == 1: + params = params[0] + + if command_name in self.commands: + command_function = self.commands[command_name] + command_return = command_function(params) + command_returns.append( + f"{command_name} command return: {command_return}" + ) + + return command_returns + + def extract_commands(self, input_content: str): + """ + Extract commands from the input_content parameter + """ + + # Regular expression to extract commands from user input + pattern = r"(\w+)\(([^)]*)\)" + matches = re.findall(pattern, input_content) + if len(matches) == 0: + return "" + + commands = [ + (match[0], [param.strip() for param in match[1].split(",")]) + for match in matches + ] + + return commands + + def write_down(self, string: str, identifier_title: str): + """ + Save the string text information in a .txt file with the name being its identifier_title parameter to retrieve it later with the retrieve command + """ + + writedown_file = Path( + f"DAIA/Versions/DAIA_GPT4V_Free/Memory/Writedown_files/{identifier_title}.txt" + ) + writedown_file.write_text(string) + + return ( + f'Successfully wrote down: "{string}" with identifier "{identifier_title}"' + ) + + def retrieve(self, identifier: str): + """ + Retrieve previously written down text + """ + + writedown_file = Path( + f"DAIA/Versions/DAIA_GPT4V_Free/Memory/Writedown_files/{identifier}.txt" + ) + # writedown_files = writedown_dir.glob('*.txt') + + return f'Retrieved data for identifier {identifier}: "{writedown_file.read_text()}"' + + def get_os_info(self): + os_info = self.os_controller.get_system_info() + + return f'OS: {os_info["OS"]}, Version: {os_info["Version"]}, Architecture: {os_info["Architecture"]}, Hostname: {os_info["Hostname"]}' + + def web(self, website_url: str): + """ + Open the website with its websitre_url with a Chrome webdriver using selenium + """ + + # Specifing the browser options + options = Options() + options.add_argument("--ignore-certificate-errors") + options.add_experimental_option("excludeSwitches", ["enable-logging"]) + options.add_argument("--log-level=3") + browser = webdriver.Chrome(options=options) + + browser.get(website_url) + + return f"Successfully visited {website_url} with Google Chrome" + + def terminal(self, command: str): + """ + Run commands on the terminal and return their outputs + """ + + r_command = subprocess.run(command, capture_output=True, text=True) + + if r_command.returncode == 0: + # Return the stdout of the command + return r_command.stdout + + else: + # If the command fails + return f"Error: [{r_command.returncode}]:\n{r_command.stderr}" + + def click(self, x, y): + self.os_controller.click(x, y) + + return f"Successfully clicked at {x}x, {y}y" + + def click_on_item(self, item: str): + """ + Click on an item (known by the item parameter) on the screen, by first taking a screenshot, then giving the screenshot to the DVAI, then asking it to give coordinates for the item location on the screen and, then clicking at those coordinates + + Dev note: The DVAI (GPT-4-with-Vision) currently cannot know the coordinates of what it is seeing, + because of this we should add a coordinate map image over the screen when the screenshot is taken so that the DVAI can then know at what coordinates its item is. + """ + + location = self.screenshot( + f"Where on the provided screenshot is {item}? Provide the location in x and y coordinates. For example: 500x 200y" + ) + x_value, y_value = self.extract_coordinates(location) + + self.click(x_value, y_value) + + return f"Successfully clicked at {item}, that was located at {x_value}x, {y_value}y" + + def move_cursor_to(self, x, y): + self.os_controller.move_cursor_to(x, y) + + return f"Successfully moved cursor to {x}x, {y}y" + + def move_cursor_to_item(self, item: str): + """ + Move the cursor to an item (known by the item parameter) on the screen, by first taking a screenshot, then giving the screenshot to the DVAI, then asking it to give coordinates for the item location on the screen and, then moving the cursor to those coordinates + + Dev note: The DVAI (GPT-4-with-Vision) currently cannot know the coordinates of what it is seeing, + because of this we should add a coordinate map image over the screen when the screenshot is taken so that the DVAI can then know at what coordinates its item is. + """ + + location = self.screenshot( + f"Where on the provided screenshot is {item}? Provide the location in x and y coordinates. For example: 500x 200y" + ) + x_value, y_value = self.extract_coordinates(location) + + self.move_cursor_to(x_value, y_value) + + return f"Successfully moved cursor to {item}, that was located at {x_value}x, {y_value}y" + + def keyboard(self, string: str): + self.os_controller.keyboard(string) + + return f'Successfully typed "{string}"' + + def screenshot(self, need: str): + """ + Take a screenshot and ask the DVAI what you need to know (need parameter) about the screenshot, and return the DVAI's response to it + """ + + screenshot_savepath = Path( + f'DAIA/Screenshots/screenshot{"".join([str(e + randint(1, 9)) for e in range(10)])}.png' + ) + self.os_controller.screenshot(screenshot_savepath) + + dvai = DVAI(self.api_key) + response = dvai.gpt_with_vision_by_base64(screenshot_savepath, need) + + return response + + def wait(self, seconds: str): + sleep(int(seconds)) + + return f"Successfully waited {seconds} seconds" + + def extract_coordinates(self, message: str): + """ + Extract the coordinates present in the message, for example: 500x and 200y + """ + + # Define a regular expression pattern to match x and y values + pattern = re.compile(r"(\d+)x.*?(\d+)y") + + # Find all matches in the given message + match = pattern.search(message) + + # Initialize x and y values + x_value = None + y_value = None + + # Extract x and y values from the match + if match: + x_value = int(match.group(1)) + y_value = int(match.group(2)) + + return x_value, y_value diff --git a/DAIA_GPT4V/OS_control/os_controller.py b/Versions/DAIA_GPT4V_FreeThink/OS_control/os_controller.py similarity index 91% rename from DAIA_GPT4V/OS_control/os_controller.py rename to Versions/DAIA_GPT4V_FreeThink/OS_control/os_controller.py index e263534..16e290a 100644 --- a/DAIA_GPT4V/OS_control/os_controller.py +++ b/Versions/DAIA_GPT4V_FreeThink/OS_control/os_controller.py @@ -1,4 +1,4 @@ -# DAIA - Digital Artificial Inteligence Agent +# DAIA - Digital Artificial Intelligence Agent # Copyright (C) 2023 Envedity # # This program is free software: you can redistribute it and/or modify @@ -28,10 +28,10 @@ def __init__(self): self.y = pyautogui.position()[1] def click(self, x, y): - pyautogui.click(x, y) + pyautogui.click(int(x), int(y)) def move_cursor_to(self, x, y): - pyautogui.moveTo(x, y, duration=0.2) + pyautogui.moveTo(int(x), int(y), duration=0.2) def keyboard(self, string): pyautogui.typewrite(string) diff --git a/Versions/DAIA_GPT4V_FreeThink/run.py b/Versions/DAIA_GPT4V_FreeThink/run.py new file mode 100644 index 0000000..a395fb3 --- /dev/null +++ b/Versions/DAIA_GPT4V_FreeThink/run.py @@ -0,0 +1,158 @@ +# DAIA - Digital Artificial Intelligence Agent +# Copyright (C) 2023 Envedity +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from openai import OpenAI +from Versions.DAIA_GPT4V_FreeThink.Interpreter.interpreter import Interpreter +from utils.setup import setup + + +def run_f(api_key): + setup() + + interpreter = Interpreter(api_key) + client = OpenAI( + api_key=api_key, + ) + + # System message + # Dev note: Need to add more examples + system_prompt = f""" +Your name is DAIA which stands for Digital Artificial Intelligence Agent. +You are an AI designed to complete goals from users on a remote machine. To do this, you are given many useful commands that the users have permitted you to use on the remote machine. + +Here is an example, so you can understand: +command(parameter/s) [command's return] "Description and use cases" + +Here are all your commands: +write_down(string, identifier_title) [feedback] "Write down text information with its identifier_title to retrive it later, with the retrive command. To use the command simply type write_down(string, identifier_title) in your response. The identifier parameter must be less than 30 string characters in length." +retrieve (identifier) [data] "Retrieve previously written down data by its identifier_title parameter." +get_os_info() [OS information] "Get the remote machine operating system information, such as: name of OS, Version of OS, architecture and hostname." +web(website_url) [feedback] "Go to a website using the website_url parameter, on the remote machine." +terminal(command) [terminal response] "Access the OS's terminal and run commands on it." + +click(x, y) [feedback] "Click on a position on the remote machine located by the x and y coordinates parameter. 0x and 0y are at the upper left top of the remote machine and the maximum x and y are 3000x and 3000y there are no negative values. (not recommended: harder to use, because you have to provide specific x and y coordinates)" +click_on_item(item) [feedback] "Click on the item on the remote machine located by item parameter. The item parameter must be a string, for example, Google Chrome, Discord.." +move_cursor_to(x, y) [feedback] "Move the cursor on the remote machine to your desired location by the x and y coordinates parameter. 0x and 0y are at the upper left top of the remote machine and the maximum x and y are 3000x and 3000y there are no negative values. (not recommended: harder to use, because you have to provide specific x and y coordinates)" +move_cursor_to_item(item) [feedback] "Move the cursor onto the item on the remote machine located by item parameter. The item parameter must be a string, for example, Google Chrome, Discord.." +keyboard(string) [feedback] "Type the string parameter on the keyboard of the remote machine." + +screenshot(need) [screenshot description] "Take a screenshot of the remote machine, and extract the information you want from it, through your need parameter. The need parameter should be a string that represents a question, for what to look for in the screenshot." +wait(amount in seconds) [feedback] "Wait for some time. Provide the amount of time you want to wait in seconds as the parameter." + +The command's return will be returned in the next response of the user, like this: +command(parameter) command return: [command's return] + +The user can still communicate with you in his next response, however, his message will be displayed under the command returns. +Here is an example of what I mean: + +You (DAIA): "To install the app I will first need to go to its website. +web(https://exampleapp.com/download/)" + +User: "web(https://exampleapp.com/download/) command return: [Successfully visited https://exampleapp.com/download/ with Chrome browser] +Good job on knowing that." + +Additionally, you can also use multiple commands and you will get multiple returns for them in the next response of the user. The commands will be executed in a sequential order. +Here is an example: + +You (DAIA): "To install the app I will first need to go to its website. +web(https://exampleapp.com/download/) +Then I would need to click on the download button. +click_on_item(Example app download button) +After that, I need to wait for the download to finish. I can check this by taking a screenshot. +screenshot(Is the example app download finished?)" + +User: "web(https://exampleapp.com/download/) command return: [Successfully visited https://exampleapp.com/download/ with Chrome browser] +click_on_item(Example app download button) command return: [Successfully clicked on the example app download button] +screenshot(Is the example app download finished?) command return: [The example app has finished installing]" +""" + messages = [] + messages.append( + { + "role": "system", + "content": system_prompt, + } + ) + + while True: + output = "" + user_input = input("Your message: ") + + # Check if there were any command outputs from any previouslly runned commands + if len(output) == 0: + messages.append( + { + "role": "user", + "content": user_input, + } + ) + + # GPT response + response = client.chat.completions.create( + model="gpt-4-vision-preview", messages=messages, max_tokens=3000 + ) + response = response.choices[0].message.content + print(f"\n\nDAIA: {response}\n\n") + messages.append( + { + "role": "assistant", + "content": response, + } + ) + + # Check and run the commands in GPT's response and format them into a string as an output + cmd_outputs = interpreter.interpret_commands(response) + if cmd_outputs == "": + continue + + for cmd_output in cmd_outputs: + output += "[" + cmd_output + "]\n" + + print(output) + continue + + # If there were command outputs present + # Combine the outputs with the user's response in the next message of the user (DAIA-Free) + user_input_with_cmd_returns = f"{str(output)}\n{user_input}" + + messages.append( + { + "role": "user", + "content": user_input_with_cmd_returns, + } + ) + + # GPT response + response = client.chat.completions.create( + model="gpt-4-vision-preview", messages=messages, max_tokens=3000 + ) + response = response.choices[0].message.content + print(f"\n\nDAIA: {response}\n\n") + messages.append( + { + "role": "assistant", + "content": response, + } + ) + + # Check and run the commands in GPT's response and format them into a string as an output + cmd_outputs = interpreter.interpret_commands(response) + if cmd_outputs == "": + continue + + for cmd_output in cmd_outputs: + output += "[" + cmd_output + "]\n" + + print(output) diff --git a/Versions/DAIA_GPT4V_PreProgrammed/DVAI/GPT_4_with_Vision.py b/Versions/DAIA_GPT4V_PreProgrammed/DVAI/GPT_4_with_Vision.py new file mode 100644 index 0000000..7798db7 --- /dev/null +++ b/Versions/DAIA_GPT4V_PreProgrammed/DVAI/GPT_4_with_Vision.py @@ -0,0 +1,90 @@ +# DAIA - Digital Artificial Intelligence Agent +# Copyright (C) 2023 Envedity +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from openai import OpenAI +import base64 +import requests + + +class DVAI: + """ + Digital Vision Artificial Intelligence + """ + + def __init__(self, key: str): + self.api_key = key + + self.client = OpenAI( + api_key=key, + ) + + def gpt_with_vision_by_url(self, image_url, context: str): + response = self.client.chat.completions.create( + model="gpt-4-vision-preview", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": context}, + { + "type": "image_url", + "image_url": { + "url": image_url, + }, + }, + ], + } + ], + ) + + return response.choices[0].message.content + + def gpt_with_vision_by_base64(self, image_path: str, context: str): + base64_image = self.encode_image(image_path) + + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.api_key}", + } + + payload = { + "model": "gpt-4-vision-preview", + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": context}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_image}" + }, + }, + ], + } + ], + "max_tokens": 1000, + } + + response = requests.post( + "https://api.openai.com/v1/chat/completions", headers=headers, json=payload + ) + + return response.json()["choices"][0]["message"]["content"] + + def encode_image(self, image_path): + with open(image_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode("utf-8") diff --git a/Design/DAIA (GPT Vision) progress.drawio b/Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA (GPT Vision) progress.drawio similarity index 100% rename from Design/DAIA (GPT Vision) progress.drawio rename to Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA (GPT Vision) progress.drawio diff --git a/Design/DAIA (GPT Vision) progress.png b/Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA (GPT Vision) progress.png similarity index 100% rename from Design/DAIA (GPT Vision) progress.png rename to Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA (GPT Vision) progress.png diff --git a/Design/DAIA (GPT Vision).drawio b/Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA (GPT Vision).drawio similarity index 100% rename from Design/DAIA (GPT Vision).drawio rename to Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA (GPT Vision).drawio diff --git a/Design/DAIA (GPT Vision).pdf b/Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA (GPT Vision).pdf similarity index 100% rename from Design/DAIA (GPT Vision).pdf rename to Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA (GPT Vision).pdf diff --git a/Design/DAIA (GPT Vision).png b/Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA (GPT Vision).png similarity index 100% rename from Design/DAIA (GPT Vision).png rename to Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA (GPT Vision).png diff --git a/Design/DAIA.drawio b/Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA.drawio similarity index 100% rename from Design/DAIA.drawio rename to Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA.drawio diff --git a/Design/DAIA.pdf b/Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA.pdf similarity index 100% rename from Design/DAIA.pdf rename to Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA.pdf diff --git a/Design/DAIA.png b/Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA.png similarity index 100% rename from Design/DAIA.png rename to Versions/DAIA_GPT4V_PreProgrammed/Design/DAIA.png diff --git a/Design/DVAI_plugin.drawio b/Versions/DAIA_GPT4V_PreProgrammed/Design/DVAI_plugin.drawio similarity index 100% rename from Design/DVAI_plugin.drawio rename to Versions/DAIA_GPT4V_PreProgrammed/Design/DVAI_plugin.drawio diff --git a/Design/DVAI_plugin.pdf b/Versions/DAIA_GPT4V_PreProgrammed/Design/DVAI_plugin.pdf similarity index 100% rename from Design/DVAI_plugin.pdf rename to Versions/DAIA_GPT4V_PreProgrammed/Design/DVAI_plugin.pdf diff --git a/Design/DVAI_plugin.png b/Versions/DAIA_GPT4V_PreProgrammed/Design/DVAI_plugin.png similarity index 100% rename from Design/DVAI_plugin.png rename to Versions/DAIA_GPT4V_PreProgrammed/Design/DVAI_plugin.png diff --git a/DAIA_GPT4V/Memory/memory.py b/Versions/DAIA_GPT4V_PreProgrammed/Memory/memory.py similarity index 98% rename from DAIA_GPT4V/Memory/memory.py rename to Versions/DAIA_GPT4V_PreProgrammed/Memory/memory.py index 713f375..3426165 100644 --- a/DAIA_GPT4V/Memory/memory.py +++ b/Versions/DAIA_GPT4V_PreProgrammed/Memory/memory.py @@ -1,4 +1,4 @@ -# DAIA - Digital Artificial Inteligence Agent +# DAIA - Digital Artificial Intelligence Agent # Copyright (C) 2023 Envedity # # This program is free software: you can redistribute it and/or modify diff --git a/Versions/DAIA_GPT4V_PreProgrammed/OS_control/os_controller.py b/Versions/DAIA_GPT4V_PreProgrammed/OS_control/os_controller.py new file mode 100644 index 0000000..16e290a --- /dev/null +++ b/Versions/DAIA_GPT4V_PreProgrammed/OS_control/os_controller.py @@ -0,0 +1,55 @@ +# DAIA - Digital Artificial Intelligence Agent +# Copyright (C) 2023 Envedity +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import pyautogui +import platform + + +class OSController: + """ + Control the OS using pyautogui, and get its data with the platform library + """ + + def __init__(self): + self.x = pyautogui.position()[0] + self.y = pyautogui.position()[1] + + def click(self, x, y): + pyautogui.click(int(x), int(y)) + + def move_cursor_to(self, x, y): + pyautogui.moveTo(int(x), int(y), duration=0.2) + + def keyboard(self, string): + pyautogui.typewrite(string) + + def scroll(self, direction, amount): + if direction == "up": + pyautogui.scroll(-amount) + elif direction == "down": + pyautogui.scroll(amount) + + def screenshot(self, path): + screenshot = pyautogui.screenshot() + screenshot.save(path) + + def get_system_info(self): + return { + "OS": platform.system(), + "Version": platform.release(), + "Architecture": platform.processor(), + "Hostname": platform.node(), + } diff --git a/DAIA_GPT4V/Optimizer/optimization.py b/Versions/DAIA_GPT4V_PreProgrammed/Optimizer/optimization.py similarity index 100% rename from DAIA_GPT4V/Optimizer/optimization.py rename to Versions/DAIA_GPT4V_PreProgrammed/Optimizer/optimization.py diff --git a/DAIA_GPT4V/Thinker/thinking.py b/Versions/DAIA_GPT4V_PreProgrammed/Thinker/thinking.py similarity index 98% rename from DAIA_GPT4V/Thinker/thinking.py rename to Versions/DAIA_GPT4V_PreProgrammed/Thinker/thinking.py index 13f510d..c1580b6 100644 --- a/DAIA_GPT4V/Thinker/thinking.py +++ b/Versions/DAIA_GPT4V_PreProgrammed/Thinker/thinking.py @@ -1,4 +1,4 @@ -# DAIA - Digital Artificial Inteligence Agent +# DAIA - Digital Artificial Intelligence Agent # Copyright (C) 2023 Envedity # # This program is free software: you can redistribute it and/or modify @@ -14,9 +14,9 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from DAIA_GPT4V.Memory.memory import Memory -from DAIA_GPT4V.OS_control.os_controller import OSController -from DAIA_GPT4V.DVAI.GPT_4_with_Vision import DVAI +from Versions.DAIA_GPT4V_PreProgrammed.Memory.memory import Memory +from Versions.DAIA_GPT4V_PreProgrammed.OS_control.os_controller import OSController +from Versions.DAIA_GPT4V_PreProgrammed.DVAI.GPT_4_with_Vision import DVAI from utils.setup import setup from openai import OpenAI from pathlib import Path diff --git a/DAIA_GPT4V/run.py b/Versions/DAIA_GPT4V_PreProgrammed/run.py similarity index 98% rename from DAIA_GPT4V/run.py rename to Versions/DAIA_GPT4V_PreProgrammed/run.py index 23127ba..53a401b 100644 --- a/DAIA_GPT4V/run.py +++ b/Versions/DAIA_GPT4V_PreProgrammed/run.py @@ -1,4 +1,4 @@ -# DAIA - Digital Artificial Inteligence Agent +# DAIA - Digital Artificial Intelligence Agent # Copyright (C) 2023 Envedity # # This program is free software: you can redistribute it and/or modify @@ -14,7 +14,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from DAIA_GPT4V.Thinker.thinking import Think +from Versions.DAIA_GPT4V_PreProgrammed.Thinker.thinking import Think from openai import OpenAI diff --git a/Design/DAIA_dev_plan.drawio b/Versions/Plan/DAIA_dev_plan.drawio similarity index 100% rename from Design/DAIA_dev_plan.drawio rename to Versions/Plan/DAIA_dev_plan.drawio diff --git a/Design/DAIA_dev_plan.pdf b/Versions/Plan/DAIA_dev_plan.pdf similarity index 100% rename from Design/DAIA_dev_plan.pdf rename to Versions/Plan/DAIA_dev_plan.pdf diff --git a/Design/DAIA_dev_plan.png b/Versions/Plan/DAIA_dev_plan.png similarity index 100% rename from Design/DAIA_dev_plan.png rename to Versions/Plan/DAIA_dev_plan.png diff --git a/utils/progress_indicator.py b/utils/progress_indicator.py new file mode 100644 index 0000000..aa321bd --- /dev/null +++ b/utils/progress_indicator.py @@ -0,0 +1,45 @@ +# DAIA - Digital Artificial Intelligence Agent +# Copyright (C) 2023 Envedity +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import sys +import time + + +class ProgressIndicator: + def __init__(self, prefix="Thinking", suffix="Complete"): + self.prefix = prefix + self.suffix = suffix + self.start_time = time.time() + + def update(self, iteration): + progress_indicator = self.get_spinner(iteration) + + sys.stdout.write("\r%s %s" % (self.prefix, progress_indicator)) + sys.stdout.flush() + + def get_spinner(self, iteration): + spinners = ["-", "\\", "|", "/"] + return spinners[iteration % len(spinners)] + + +total_iterations = 100 +progress_indicator = ProgressIndicator(prefix="Thinking", suffix="Complete") + +for i in range(total_iterations + 1): + time.sleep(0.1) + progress_indicator.update(i) + +sys.stdout.flush() diff --git a/utils/setup.py b/utils/setup.py index af64bbc..d912db8 100644 --- a/utils/setup.py +++ b/utils/setup.py @@ -1,4 +1,4 @@ -# DAIA - Digital Artificial Inteligence Agent +# DAIA - Digital Artificial Intelligence Agent # Copyright (C) 2023 Envedity # # This program is free software: you can redistribute it and/or modify