In [None]:
import openai
import speech_recognition as sr
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.service import Service
from webdriver_manager.firefox import GeckoDriverManager
import time
from dotenv import load_dotenv
import os
import json

# Load environment variables
load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")
model_name = os.getenv("OPENAI_MODEL")
client = openai.OpenAI()

# Setup speech recognition
r = sr.Recognizer()

# Setup Selenium service (but don't start Firefox yet)
service = Service(GeckoDriverManager().install())
driver = None  # Lazy-load driver when needed

listening = True

def scrape_page_elements():
    """Scrapes page and returns list of elements."""
    elements = []
    try:
        # Scrape all links
        links = driver.find_elements(By.TAG_NAME, "a")
        for link in links:
            text = link.text.strip()
            href = link.get_attribute('href')
            if text and href:
                elements.append(f"Link: '{text}' -> {href}")

        # Scrape all buttons
        buttons = driver.find_elements(By.TAG_NAME, "button")
        for button in buttons:
            text = button.text.strip()
            if text:
                elements.append(f"Button: '{text}'")

        # Scrape headers
        headers = driver.find_elements(By.TAG_NAME, "h1") + driver.find_elements(By.TAG_NAME, "h2")
        for header in headers:
            text = header.text.strip()
            if text:
                elements.append(f"Header: '{text}'")

    except Exception as e:
        print("Error scraping page:", e)

    return elements

def ask_gpt_for_action(command_text):
    """Sends voice command + page context to GPT."""
    page_elements = scrape_page_elements() if driver else []
    context = "\n".join(page_elements[:20])  # Limit to first 20 items for short prompt

    system_prompt = f"""
You are a smart browser assistant.

Here are the elements currently on the page:
{context}

ONLY respond in strict JSON.
You can:
- Open a URL
- Scroll
- Close the current tab
- Switch to another tab
- Click a link (ONLY if it appears above by exact text)

Example:
{{"action":"open","url":"https://github.com","new_tab":true}}
{{"action":"scroll","direction":"down","amount":500}}
{{"action":"close_tab"}}
{{"action":"switch_tab","tab_index":1}}
{{"action":"click","link_text":"All USCIS News"}}

Never invent links or buttons!
"""

    response = client.chat.completions.create(
        model=model_name,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": f"My command: {command_text}"}
        ]
    )
    return response.choices[0].message.content

def handle_ai_command(command_json):
    """Handles AI-decided action."""
    global driver
    try:
        command_json = command_json.replace("'", '"')  # Fix single quotes
        command = json.loads(command_json)

        if driver is None and command['action'] == "open":
            driver = webdriver.Firefox(service=service)

        if command['action'] == "open":
            url = command['url']
            if command.get('new_tab', False):
                driver.execute_script(f"window.open('{url}', '_blank');")
                driver.switch_to.window(driver.window_handles[-1])  # Focus on new tab
            else:
                driver.get(url)

        elif command['action'] == "scroll":
            amount = command.get('amount', 500)
            direction = command.get('direction', 'down')
            if direction == "down":
                driver.execute_script(f"window.scrollBy(0, {amount})")
            elif direction == "up":
                driver.execute_script(f"window.scrollBy(0, {-amount})")

        elif command['action'] == "close_tab":
            driver.close()
            if driver.window_handles:
                driver.switch_to.window(driver.window_handles[-1])
            else:
                driver = None  # No windows left

        elif command['action'] == "switch_tab":
            tab_index = command.get('tab_index', 0)
            if driver and 0 <= tab_index < len(driver.window_handles):
                driver.switch_to.window(driver.window_handles[tab_index])
                print(f"✅ Switched to tab {tab_index + 1}")
            else:
                print("❌ Invalid tab index or no browser open.")

        elif command['action'] == "click":
            link_text = command['link_text'].lower()
            links = driver.find_elements(By.TAG_NAME, "a")
            found = False
            for link in links:
                if link.text.lower() == link_text:
                    link.click()
                    found = True
                    print(f"✅ Clicked link: {link_text}")
                    break
            if not found:
                print(f"❌ Link '{link_text}' not found.")

        else:
            print("❓ Unknown action:", command)

    except Exception as e:
        print("❌ Failed to handle AI command:", e)
        print("Command content was:", command_json)

# Main listening loop
with sr.Microphone() as source:
    r.adjust_for_ambient_noise(source)
    print("🎤 Ready for AI commands!")

    while True:
        if listening:
            print("🎧 Listening...")
            try:
                audio = r.listen(source)
                text = r.recognize_google(audio).lower().strip()
                print(f"🗣️ Heard: {text}")

                if "stop listening" in text:
                    listening = False
                    print("🛑 Listening paused.")

                else:
                    ai_response = ask_gpt_for_action(text)
                    print("🤖 GPT Response:", ai_response)
                    handle_ai_command(ai_response)

            except sr.UnknownValueError:
                print("🤷 Didn't catch that.")
            except sr.RequestError:
                print("🚫 Speech service error.")

        else:
            print("⏸️ Listening paused. Say 'continue listening' to resume.")
            try:
                audio = r.listen(source, phrase_time_limit=4)
                text = r.recognize_google(audio).lower().strip()
                print(f"🗣️ Heard while paused: {text}")

                if "continue listening" in text or "start listening" in text:
                    listening = True
                    print("▶️ Listening resumed.")

            except sr.UnknownValueError:
                print("🤷 Didn't catch that.")
            except sr.RequestError:
                print("🚫 Speech service error.")

        time.sleep(0.5)


🎤 Ready for AI commands!
🎧 Listening...
🗣️ Heard: open google
🤖 GPT Response: {"action":"open","url":"https://www.google.com","new_tab":true}
🎧 Listening...
🗣️ Heard: navigate to the latest news art
🤖 GPT Response: {"action":"click","link_text":"About"}
✅ Clicked link: about
🎧 Listening...
🗣️ Heard: no i want to go to the latest news
🤖 GPT Response: {"action":"click","link_text":"News"}
✅ Clicked link: news
🎧 Listening...
🗣️ Heard: political news
🤖 GPT Response: {"action":"click","link_text":"News"}
✅ Clicked link: news
🎧 Listening...


In [5]:
import openai
from dotenv import load_dotenv
import os

load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")
client = openai.OpenAI()

model_name = os.getenv("OPENAI_MODEL", "gpt-4-turbo")

try:
    print(f"🧪 Testing model: {model_name}...")
    response = client.chat.completions.create(
        model=model_name,
        messages=[{"role": "user", "content": "Say hello"}],
    )
    print(f"✅ Model '{model_name}' responded:")
    print(response.choices[0].message.content)
except Exception as e:
    print(f"❌ Error testing model '{model_name}':")
    print(e)
    exit(1)


🧪 Testing model: gpt-4.1-mini...
✅ Model 'gpt-4.1-mini' responded:
Hello! How can I help you today?
