In [21]:
import google.generativeai as genai
import ast
import os
import json

# Load environment variables
from dotenv import load_dotenv
from selenium_functions import scrapeByXPath, scrapeById

load_dotenv()

# Convert the GEMINI_API_KEYS string from environment variables to a list
GEMINI_API_KEYS = os.environ.get("GEMINI_API_KEYS")
KEY_LIST = ast.literal_eval(GEMINI_API_KEYS)

# Global index to keep track of the current key
current_api_key_index = 0




In [67]:
from lxml import html


def extract_elements_by_xpath(html_string, xpath_selector):
    # Parse the HTML
    tree = html.fromstring(html_string)

    # Apply the XPath selector
    elements = tree.xpath(xpath_selector)

    # Return a list of outer HTML for each element
    return str([html.tostring(element).decode("utf-8") for element in elements] + [xpath_selector])

In [76]:
system_prompt_interpret = """
You are a web browser navigation assistant that trims and scrapes relevant portions of the UI for a user.

Whenever a user requests something, you will return a navigator action to switch URLs, or an xpath or id selector to the relevant parts of the UI for a user to look at.

You will only return path or id selectors. If the request requires multiple choices, return ALL RELEVANT selectors that contains the UI that will enable the user to choose the choice themselves.

For example, if there is a container containing two buttons, and it is ambiguous which button the user is interested in, return a selector to the container instead of one of the buttons only.

Output your result in the following format:

If the user is interested in a specific part of the UI, output your result in the following format:
[
    {
        "type": either "xpath" or "id",
        "selector": the selection string
    },
    {
        "type": either "xpath" or "id",
        "selector": the selection string
    },
]
"""

system_prompt_generate = """
You are a web browser navigation assistant that generates a user interface for a user to interact with.
You will be given DOM elements from another web browser navigation assistant that trims and scrapes relevant portions of the UI for a user.

Your task is to generate valid HTML strings that can be rendered in a browser, specifically focusing on interactive elements such as buttons and text fields. 
Please use TailwindCSS for styling.

Each element should only have two attributes:
- class: a string of classes separated by spaces, for TailwindCSS styling
- special-id: the XPath or id selector that was given to you, which will be used for identifying the element during interactions
- type: a string that is either 'text' or 'button' or 'input'

Output your result in the following format:
<div>
    <div type='text' class='input classes here'>
        <!-- Additional content here -->
    </div>
    <div type='button' class='button classes here' special-id='button selector here'">
        <!-- Additional content here -->
    </div>
    <input type='input' class='input classes here' special-id='input selector here'>
</div>
"""

In [38]:
def cycle_api_key():
    global current_api_key_index
    if current_api_key_index >= len(KEY_LIST) - 1:
        current_api_key_index = 0
    else:
        current_api_key_index += 1
    return KEY_LIST[current_api_key_index]


def generate_content_with_cycling_keys(prompt, system_prompt):
    global current_api_key_index
    # Get the current API key and cycle to the next one for future requests
    api_key = cycle_api_key()

    # Configure the generative AI model with the new API key
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel(
        "gemini-1.5-pro-latest",
        generation_config=genai.GenerationConfig(
            max_output_tokens=8000,
            temperature=0,
        ),
        system_instruction=system_prompt,
    )

    # Generate content using the provided prompt
    response = model.generate_content(prompt)
    return response.text

In [24]:
# Load website.html into a string
with open('website.html', 'r') as file:
    website_html = file.read()
    
user_prompt = f"""
user: I want to order a pizza

current_page: {website_html}
"""
    
# Generate content using the prompt and the website HTML
response = generate_content_with_cycling_keys(user_prompt, system_prompt_interpret)

In [26]:
obj = json.loads(response)
obj

[{'type': 'xpath',
  'selector': '//a[@data-quid="start-your-order-delivery-cta"]'},
 {'type': 'xpath',
  'selector': '//a[@data-quid="start-your-order-carryout-cta"]'}]

In [68]:
dom_elements = ""
for element in obj:
    if element['type'] == 'xpath':
        dom_elements += extract_elements_by_xpath(website_html, element["selector"])
        dom_elements += "\n"
    elif element['type'] == 'id':
        pass

In [69]:
print(dom_elements)

['<a data-dpz-track-evt-name="SYO_Delivery" data-quid="start-your-order-delivery-cta" href="/en/restaurants?type=Delivery" class="css-14js2j3">Delivery</a>', '//a[@data-quid="start-your-order-delivery-cta"]']
['<a data-dpz-track-evt-name="SYO_Carryout" data-quid="start-your-order-carryout-cta" href="/en/restaurants?type=Carryout" class="css-14js2j3">Carryout</a>', '//a[@data-quid="start-your-order-carryout-cta"]']



In [77]:
response = generate_content_with_cycling_keys(dom_elements, system_prompt_generate)

In [78]:
print(response)

```html
<div>
    <div class='hover:bg-gray-100 px-2 py-1 rounded-md text-sm font-medium' id='//a[@data-quid="start-your-order-delivery-cta"]'>
        Delivery
    </div>
    <div class='hover:bg-gray-100 px-2 py-1 rounded-md text-sm font-medium' id='//a[@data-quid="start-your-order-carryout-cta"]'>
        Carryout
    </div>
</div>
```
