In [11]:
import google.generativeai as genai
import ast
import os
import json
import PIL.Image
from selenium.webdriver.common.by import By

# Load environment variables
from dotenv import load_dotenv
from selenium_functions import open_browser

load_dotenv()

# Convert the GEMINI_API_KEYS string from environment variables to a list
GEMINI_API_KEYS = os.environ.get("GEMINI_API_KEYS")
KEY_LIST = ast.literal_eval(GEMINI_API_KEYS)

# Global index to keep track of the current key
current_api_key_index = 0




In [2]:
from lxml import html


def extract_elements_by_xpath(html_string, xpath_selector):
    # Parse the HTML
    tree = html.fromstring(html_string)

    # Apply the XPath selector
    elements = tree.xpath(xpath_selector)

    # Return a list of outer HTML for each element
    return str([html.tostring(element).decode("utf-8") for element in elements])

In [4]:
system_prompt_interpret = """
You are a web browser navigation assistant that trims and scrapes relevant portions of the UI for a user.

Whenever a user requests something, you will return a navigator action to switch URLs, or an xpath or id selector to the relevant parts of the UI for a user to look at.

You will only return path or id selectors. If the request requires multiple choices, return ALL RELEVANT selectors that contains the UI that will enable the user to choose the choice themselves.

For example, if there is a container containing two buttons, and it is ambiguous which button the user is interested in, return a selector to the container instead of one of the buttons only.

Output your result in the following format:

If the user is interested in a specific part of the UI, output your result in the following format:
[
    {
        "type": either "xpath" or "id",
        "selector": the selection string
    },
    {
        "type": either "xpath" or "id",
        "selector": the selection string
    },
]
"""

system_prompt_generate = """
You are a web browser navigation assistant that generates a user interface for a user to interact with.
You will be given dom elements from another web browser navigation assistant that trims and scrapes relevant portions of the UI for a user.

Your task is to generate valid html strings that can be rendered in a browser.
Please use tailwindcss for styling.

Output your result in the following format:
<div>
    ...
</div>
"""

In [14]:
def cycle_api_key():
    global current_api_key_index
    if current_api_key_index >= len(KEY_LIST) - 1:
        current_api_key_index = 0
    else:
        current_api_key_index += 1
    return KEY_LIST[current_api_key_index]


def generate_content_with_cycling_keys(prompt, system_prompt, image):
    global current_api_key_index
    # Get the current API key and cycle to the next one for future requests
    api_key = cycle_api_key()

    # Configure the generative AI model with the new API key
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel(
        "gemini-1.5-pro-latest",
        generation_config=genai.GenerationConfig(
            max_output_tokens=8000,
            temperature=0,
        ),
        system_instruction=system_prompt,
    )

    # Generate content using the provided prompt
    response = model.generate_content([prompt, image])
    return response.text

In [13]:
# Load website.html into a string
# with open('website.html', 'r') as file:
#     website_html = file.read()
    
# use selenium to open the browser and take a screenshot
browser = open_browser()

browser.get("https://www.dominos.com/en/")

website_html = browser.page_source

# take a screenshot of the website
screenshot = browser.save_screenshot("website.png")
img = PIL.Image.open('website.png')

user_prompt = f"""
user: I want to order a pizza

current_page: {website_html}

current_screenshot is attached
"""
    
# Generate content using the prompt and the website HTML
response = generate_content_with_cycling_keys(user_prompt, system_prompt_interpret, img)

AttributeError: 'WebDriver' object has no attribute 'get_full_page_screenshot_as_base64'

In [26]:
obj = json.loads(response)
obj

[{'type': 'xpath',
  'selector': '//a[@data-quid="start-your-order-delivery-cta"]'},
 {'type': 'xpath',
  'selector': '//a[@data-quid="start-your-order-carryout-cta"]'}]

In [33]:
dom_elements = ""
for element in obj:
    if element['type'] == 'xpath':
        dom_elements += extract_elements_by_xpath(website_html, element["selector"])
        dom_elements += "\n"
    elif element['type'] == 'id':
        pass

In [34]:
print(dom_elements)

['<a data-dpz-track-evt-name="SYO_Delivery" data-quid="start-your-order-delivery-cta" href="/en/restaurants?type=Delivery" class="css-14js2j3">Delivery</a>']
['<a data-dpz-track-evt-name="SYO_Carryout" data-quid="start-your-order-carryout-cta" href="/en/restaurants?type=Carryout" class="css-14js2j3">Carryout</a>']



In [39]:
response = generate_content_with_cycling_keys(dom_elements, system_prompt_generate)

In [40]:
print(response)

```html
<div class="flex justify-center gap-4">
    <a data-dpz-track-evt-name="SYO_Delivery" data-quid="start-your-order-delivery-cta" href="/en/restaurants?type=Delivery" class="css-14js2j3 bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded">
        Delivery
    </a>
    <a data-dpz-track-evt-name="SYO_Carryout" data-quid="start-your-order-carryout-cta" href="/en/restaurants?type=Carryout" class="css-14js2j3 bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded">
        Carryout
    </a>
</div>
``` 

