In [1]:
import google.generativeai as genai
import ast
import os
import json
import PIL.Image
from selenium.webdriver.common.by import By
from constants import *

# Load environment variables
from dotenv import load_dotenv
from selenium_functions import open_browser

load_dotenv()

# Convert the GEMINI_API_KEYS string from environment variables to a list
GEMINI_API_KEYS = os.environ.get("GEMINI_API_KEYS")
KEY_LIST = ast.literal_eval(GEMINI_API_KEYS)

# Randomly shuffle the list of API keys
import random
random.shuffle(KEY_LIST)

# Global index to keep track of the current key
current_api_key_index = 0

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from lxml import html


def extract_elements_by_xpath(html_string, xpath_selector):
    # Parse the HTML
    tree = html.fromstring(html_string)

    # Apply the XPath selector
    elements = tree.xpath(xpath_selector)

    # Return a list of outer HTML for each element
    return str([html.tostring(element).decode("utf-8") for element in elements] + [xpath_selector])

In [3]:
def cycle_api_key():
    global current_api_key_index
    if current_api_key_index >= len(KEY_LIST) - 1:
        current_api_key_index = 0
    else:
        current_api_key_index += 1
    return KEY_LIST[current_api_key_index]


def generate_content_with_cycling_keys(prompt, system_prompt, image=None):
    global current_api_key_index
    # Get the current API key and cycle to the next one for future requests
    api_key = cycle_api_key()

    # Configure the generative AI model with the new API key
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel(
        "gemini-1.5-pro-latest",
        generation_config=genai.GenerationConfig(
            max_output_tokens=8000,
            temperature=0,
        ),
        system_instruction=system_prompt,
    )

    # Generate content using the provided prompt
    if image is None:
        response = model.generate_content(prompt, request_options={"timeout": 1000})
    else:
        response = model.generate_content(
            [prompt, image], request_options={"timeout": 1000}
        )
    return response.text

In [4]:
# Load website.html into a string
with open('delivery.html', 'r') as file:
    html_string = file.read()

user_prompt = f"""
current_page: {html_string}
Output selectors for relevant elements (divs, inputs, and images) that are relevant to the user's request.x

user: I would like to order a pizza for delivery.
"""

# Generate content using the prompt and the website HTML
response = generate_content_with_cycling_keys(user_prompt, system_prompt_interpret)
print(response)
if "```json" in response:
    response = response.split("```json")[1].split("```")[0]
obj = json.loads(response)

Plan:
- The user wants to order delivery, so I need to check if the delivery option is selected.
- If the delivery option is selected, I will return all of the input fields as well as the button to continue to find a store that delivers. 
- If the delivery option is not selected, I will select the delivery option and then return all of the input fields as well as the button to continue to find a store that delivers.

```json
[
  {
    "type": "xpath",
    "selector": "//button[@id='tab-Delivery']"
  },
  {
    "type": "xpath",
    "selector": "//select[@id='Type']"
  },
  {
    "type": "xpath",
    "selector": "//input[@id='Street']"
  },
  {
    "type": "xpath",
    "selector": "//input[@id='UnitNumber']"
  },
  {
    "type": "xpath",
    "selector": "//input[@id='PostalCode']"
  },
  {
    "type": "xpath",
    "selector": "//input[@id='City']"
  },
  {
    "type": "xpath",
    "selector": "//select[@id='Region']"
  },
  {
    "type": "xpath",
    "selector": "//button[@type='submit']

In [5]:
dom_elements = ""
for element in obj:
    if element['type'] == 'xpath':
        dom_elements += extract_elements_by_xpath(html_string, element["selector"])
        dom_elements += "\n"
    else:
        dom_elements += f"src: {element['selector']}\n"

In [6]:
print(dom_elements)

['<button tabindex="0" role="tab" type="button" class="css-rssjq3" id="tab-Delivery" aria-selected="true" data-index="0" aria-labelledby="tab-Delivery-label" aria-describedby="tab-Delivery-description" data-quid="Delivery-input"><span><span class="css-n1yrf8"><svg aria-hidden="true" focusable="false" xmlns="http://www.w3.org/2000/svg" viewbox="0 0 35 35" class="css-14nm7ao"><g><path d="M9.27,21.03c0,1.21-.98,2.19-2.19,2.19s-2.19-.98-2.19-2.19,.98-2.19,2.19-2.19,2.19,.98,2.19,2.19"></path><path d="M30.29,21.03c0,1.21-.98,2.19-2.19,2.19s-2.19-.98-2.19-2.19,.98-2.19,2.19-2.19,2.19,.98,2.19,2.19"></path><path d="M32.98,33.18h-3.03c-1.02,0-1.85-.83-1.85-1.85v-1.09c0-.06-.05-.11-.11-.11H7.3c-.06,0-.11,.05-.11,.11v1.09c0,1.02-.83,1.85-1.85,1.85H2.32c-1.02,0-1.85-.83-1.85-1.85v-11.06c0-.68,.24-1.65,.55-2.25l1.58-3.01h-.75c-1.02,0-1.85-.83-1.85-1.85v-.93c0-1.02,.83-1.85,1.85-1.85h2.72c.21,0,.56-.21,.66-.4l2.41-4.59c.4-.76,1.35-1.33,2.2-1.33h15.61c.86,0,1.8,.57,2.2,1.33l2.41,4.59c.1,.19,.44,.4,.

In [7]:
design = PIL.Image.open("design.png")
response = generate_content_with_cycling_keys(
    design_schema
    + "\n\n"
    + dom_elements
    + "\n\n"
    + '"Only output div, button, input, img, and select elements. Do not use Tailwind Classes\nBase Url for images (if any): https://dominos.com \n\n',
    system_prompt_generate,
)

In [8]:
print(response)

```html
<div class='container bg-[#DEDEDE]'>
    <button class='primary button text-white font-bold py-2 px-4 rounded-[0.5rem] hover:bg-[#8200D1]' special-id='//button[@id='tab-Delivery']'>
        Delivery
    </button>
    <select class='border-[3px] border-[#DEDEDE] bg-white rounded-[0.5rem] px-3 py-2' special-id='//select[@id='Type']'>
        <option value="House">House</option>
        <option value="Apartment">Apartment</option>
        <option value="Business">Business</option>
        <option value="Campus">Campus/Base</option>
        <option value="Hotel">Hotel</option>
        <option value="Other">Other</option>
    </select>
    <input class='border-[3px] border-[#DEDEDE] bg-white rounded-[0.5rem] px-3 py-2 text-[#A5A5A5]' special-id='//input[@id='Street']'>
    <input class='border-[3px] border-[#DEDEDE] bg-white rounded-[0.5rem] px-3 py-2 text-[#A5A5A5]' special-id='//input[@id='UnitNumber']'>
    <input class='border-[3px] border-[#DEDEDE] bg-white rounded-[0.5rem] px-