In [67]:
import google.generativeai as genai
import ast
import os
import json
import PIL.Image
from selenium.webdriver.common.by import By

# Load environment variables
from dotenv import load_dotenv
from selenium_functions import open_browser

load_dotenv()

# Convert the GEMINI_API_KEYS string from environment variables to a list
GEMINI_API_KEYS = os.environ.get("GEMINI_API_KEYS")
KEY_LIST = ast.literal_eval(GEMINI_API_KEYS)

# Randomly shuffle the list of API keys
import random
random.shuffle(KEY_LIST)

# Global index to keep track of the current key
current_api_key_index = 0

In [68]:
from lxml import html


def extract_elements_by_xpath(html_string, xpath_selector):
    # Parse the HTML
    tree = html.fromstring(html_string)

    # Apply the XPath selector
    elements = tree.xpath(xpath_selector)

    # Return a list of outer HTML for each element
    return str([html.tostring(element).decode("utf-8") for element in elements] + [xpath_selector])

In [69]:
design_schema = """
Color Palette:
Text Color: #2B2B2B
Input Text Color: #A5A5A5
Paragraph Color: #646464
Background: #DEDEDE
Primary Color: #9F03FE
Hover: #8200D1

Typography:
heading
color: #2B2B2B;
font-family: Inter;
font-weight: 700;

subheadings
color: #646464;
font-family: Poppins;
font-weight: 600;

paragraph
color: #646464;
font-family: Poppins;
font-weight: 500;

subparagraphs
color: #646464;
font-family: Poppins;
font-weight: 500;

Components:

primary button
border-radius: 0.5rem;
background: #9F03FE;

secondary button
border-radius: 0.5rem;
border: 3px solid #DEDEDE;
background: #F5F4F7;

input:
border-radius: 0.5rem;
border: 3px solid #DEDEDE;
background: #FFF;
"""

In [70]:
# goal: "type": "src", "selector": "some url I think"
system_prompt_interpret = """
You are a web browser navigation assistant that trims and scrapes relevant portions of the UI for a user. Relevant is defined as the portion of the UI that the user requests for.
Only return selectors or images that are relevant to the user's request.
The selectors should only be for button, input, or text elements.

Whenever a user requests something, you will return the xpath selector or the src attribute of an image that returns the path to the specific file the image is stored in within the client file of the website.
Ensure that all paths end with the file extension of the image (examples are .jpg, .png, .gif, etc.)

If the request requires multiple choices, return ALL RELEVANT selectors that contains the UI that will enable the user to choose the choice themselves.
For example, if there are input fields related to the user's request, return all input fields that are relevant to the user's request.
If there are both buttons and input fields that are relevant to the user's request, return all buttons and input fields that are relevant to the user's request.

Output your result in the following format and output as many selectors as necessary. Ensure that the output is a JSON object and that there is a diversity of file paths aligned to the specific types of each image:
[
    {
        "type": xpath
        "selector": selector
    },
    {
        "type": src
        "selector": the src attribute of the image represented as the route to the image inside of the client file of the website
    },
    ...
]
"""

system_prompt_generate = """
You are a web browser navigation assistant that generates a user interface for a user to interact with.
You will be given DOM elements from another web browser navigation assistant that trims and scrapes relevant portions of the UI for a user.

Your task is to generate valid HTML strings that can be rendered in a browser, specifically focusing on interactive elements such as buttons and text fields. 
Please use TailwindCSS for styling. Use actual hex colors for the colors, do not use TailwindCSS classes for colors.

Each element should have an additional two attributes:
- class: a string of classes separated by spaces, for TailwindCSS styling
- special-id: the XPath or id selector that was given to you, which will be used for identifying the element during interactions

Remove all non visual attributes from the elements, such as aria labels or data attributes.

Only output images if they are contained in the DOM elements that were given to you.
Only output div, button, input, select, and img elements. Do not output any other elements.

Output your result in the following format:
<div class='container classes here'>
    <div class='input classes here'>
       ...
    </div>
    <button class='button classes here' special-id='button selector here'">
        ...
    </button>
    <input class='input classes here' special-id='input selector here'>
    <img class='img classes here' src='image source here'>
</div>
"""

In [71]:
def cycle_api_key():
    global current_api_key_index
    if current_api_key_index >= len(KEY_LIST) - 1:
        current_api_key_index = 0
    else:
        current_api_key_index += 1
    return KEY_LIST[current_api_key_index]


def generate_content_with_cycling_keys(prompt, system_prompt, image=None):
    global current_api_key_index
    # Get the current API key and cycle to the next one for future requests
    api_key = cycle_api_key()

    # Configure the generative AI model with the new API key
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel(
        "gemini-1.5-pro-latest",
        generation_config=genai.GenerationConfig(
            max_output_tokens=8000,
            temperature=0,
        ),
        system_instruction=system_prompt,
    )

    # Generate content using the provided prompt
    if image is None:
        response = model.generate_content(prompt, request_options={"timeout": 1000})
    else:
        response = model.generate_content(
            [prompt, image], request_options={"timeout": 1000}
        )
    return response.text

In [72]:
# Load website.html into a string
with open('delivery.html', 'r') as file:
    html_string = file.read()

user_prompt = f"""
current_page: {html_string}
Output selectors for relevant elements (divs, inputs, and images) that are relevant to the user's request.x

user: I would like to order a pizza for delivery.
"""

# Generate content using the prompt and the website HTML
response = generate_content_with_cycling_keys(user_prompt, system_prompt_interpret)
if "```json" in response:
    response = response.split("```json")[1].split("```")[0]
obj = json.loads(response)
obj

[{'type': 'xpath', 'selector': "//button[@id='tab-Delivery']"},
 {'type': 'xpath', 'selector': "//input[@id='Street']"},
 {'type': 'xpath', 'selector': "//input[@id='UnitNumber']"},
 {'type': 'xpath', 'selector': "//input[@id='PostalCode']"},
 {'type': 'xpath', 'selector': "//input[@id='City']"},
 {'type': 'xpath', 'selector': "//select[@id='Region']"},
 {'type': 'src',
  'selector': '/static/1.88.2/images/metaTags/dominos_social_logo.jpg'}]

In [73]:
dom_elements = ""
for element in obj:
    if element['type'] == 'xpath':
        dom_elements += extract_elements_by_xpath(html_string, element["selector"])
        dom_elements += "\n"
    else:
        dom_elements += f"src: {element['selector']}\n"

In [74]:
print(dom_elements)

['<button tabindex="0" role="tab" type="button" class="css-rssjq3" id="tab-Delivery" aria-selected="true" data-index="0" aria-labelledby="tab-Delivery-label" aria-describedby="tab-Delivery-description" data-quid="Delivery-input"><span><span class="css-n1yrf8"><svg aria-hidden="true" focusable="false" xmlns="http://www.w3.org/2000/svg" viewbox="0 0 35 35" class="css-14nm7ao"><g><path d="M9.27,21.03c0,1.21-.98,2.19-2.19,2.19s-2.19-.98-2.19-2.19,.98-2.19,2.19-2.19,2.19,.98,2.19,2.19"></path><path d="M30.29,21.03c0,1.21-.98,2.19-2.19,2.19s-2.19-.98-2.19-2.19,.98-2.19,2.19-2.19,2.19,.98,2.19,2.19"></path><path d="M32.98,33.18h-3.03c-1.02,0-1.85-.83-1.85-1.85v-1.09c0-.06-.05-.11-.11-.11H7.3c-.06,0-.11,.05-.11,.11v1.09c0,1.02-.83,1.85-1.85,1.85H2.32c-1.02,0-1.85-.83-1.85-1.85v-11.06c0-.68,.24-1.65,.55-2.25l1.58-3.01h-.75c-1.02,0-1.85-.83-1.85-1.85v-.93c0-1.02,.83-1.85,1.85-1.85h2.72c.21,0,.56-.21,.66-.4l2.41-4.59c.4-.76,1.35-1.33,2.2-1.33h15.61c.86,0,1.8,.57,2.2,1.33l2.41,4.59c.1,.19,.44,.4,.

In [75]:
design = PIL.Image.open("design.png")
response = generate_content_with_cycling_keys(
    design_schema
    + "\n\n"
    + dom_elements
    + "\n\n"
    + '"Only output div, button, input, img, and select elements. Do not use Tailwind Classes\nBase Url for images (if any): https://dominos.com \n\n',
    system_prompt_generate,
)

In [76]:
print(response)

```html
<div class='container'>
    <button class='rounded-lg bg-[#9F03FE] hover:bg-[#8200D1] px-4 py-2 text-white font-bold' special-id='//button[@id='tab-Delivery']'>
        Delivery
    </button>
    <div class='input flex flex-col gap-4'>
       <input class='border rounded-lg px-3 py-2 text-[#A5A5A5]' special-id='//input[@id='Street']' placeholder='Street'>
       <input class='border rounded-lg px-3 py-2 text-[#A5A5A5]' special-id='//input[@id='UnitNumber']' placeholder='Unit Number'>
       <input class='border rounded-lg px-3 py-2 text-[#A5A5A5]' special-id='//input[@id='PostalCode']' placeholder='Postal Code'>
       <input class='border rounded-lg px-3 py-2 text-[#A5A5A5]' special-id='//input[@id='City']' placeholder='City'>
    </div>
    <select class='border rounded-lg px-3 py-2 text-[#A5A5A5]' special-id='//select[@id='Region']'>
        <option value="">Select A State</option>
        <option value="AK">AK</option>
        <option value="AL">AL</option>
        <option 