# Interaction with WhatsApp via Selenium

In [1]:
%pip install selenium --upgrade --quiet


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
%pip show selenium

Name: selenium
Version: 4.27.1
Summary: Official Python bindings for Selenium WebDriver
Home-page: https://www.selenium.dev
Author: 
Author-email: 
License: Apache 2.0
Location: /Users/roman/Library/Caches/pypoetry/virtualenvs/whatsapp-selenium-integration-yfynyyj5-py3.10/lib/python3.10/site-packages
Requires: certifi, trio, trio-websocket, typing_extensions, urllib3, websocket-client
Required-by: 
Note: you may need to restart the kernel to use updated packages.


### Wrapper class for interacting with WhatsApp chat

In [7]:
import logging
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
import time
import asyncio
from typing import List, Dict, Optional

logging.basicConfig(level=logging.INFO)

In [8]:
class ClientWhatsAppSelenium:
    """
    A class to interact with a WhatsApp Web chat using Selenium.
    It allows for opening a chat, sending messages, and retrieving the latest incoming message.

    Methods:
        __init__: Initializes a Chrome session, opens WhatsApp Web, prompts for QR code scanning, and opens a specified chat.
        wait: Pauses execution for a given number of seconds, optionally doubling the wait time if slow mode is enabled.
        open_chat: Searches and opens a WhatsApp chat by its name or ID.
        get_last_assistant_message: Retrieves the last incoming (assistant) message from the currently open chat.
        is_user_last_message_sender: Determines whether the last retrieved message in the chat is sent by the user or assistant.
        send_message: Sends a specified text message to the currently open chat.
        interact: Sends messages and waits for an assistant response, returning the assistant’s reply if available.
    """

    def __init__(self, chat_name: str, slow_mode: bool = False):
        """
        Initialize the Chrome browser session and open WhatsApp Web.

        This method:
        - Sets up a Selenium-driven Chrome browser instance with user data.
        - Opens WhatsApp Web.
        - Waits for the user to scan the WhatsApp QR code.
        - Opens the specified chat by its name or ID.

        :param chat_name: The name or ID of the WhatsApp chat to open.
        :param slow_mode: If True, slows down certain operations by increasing wait times.
        """
        self.chat_name = chat_name
        self.loop = asyncio.get_event_loop()
        self.slow_mode = slow_mode

        chrome_options = Options()
        chrome_options.add_argument("--user-data-dir=./chrome-data")
        self.driver = webdriver.Chrome(options=chrome_options)

        self.driver.get("https://web.whatsapp.com")
        input("Scan the QR code in WhatsApp Web and press Enter...")
        self.open_chat(self.chat_name)

    def wait(self, seconds: float):
        """
        Pause the program execution for a certain number of seconds.

        If slow_mode is enabled, the waiting time is doubled. This is used to ensure that
        the web elements have enough time to load and become interactable.

        :param seconds: The base number of seconds to wait.
        """

        time.sleep(seconds * 2 if self.slow_mode else seconds)

    def open_chat(self, chat_name: str):
        """
        Open a specified WhatsApp chat by name or ID.

        This method:
        - Locates the WhatsApp search box.
        - Inputs the chat identifier and waits for results.
        - Selects the appropriate chat entry.
        - Waits briefly to ensure the chat interface loads.

        :param chat_name: The name or ID of the chat to open.
        """
        logging.info("Opening chat")
        search_box = self.driver.find_element(
            By.XPATH, "/html/body/div[1]/div/div/div[3]/div[3]/div/div/div/div[2]/div[2]/div/div/p"
        )
        search_box.clear()
        search_box.send_keys(chat_name)
        self.wait(2)

        chat = self.driver.find_element(
            By.XPATH, "/html/body/div[1]/div/div/div[3]/div[3]/div/div[3]/div/div/div/div/div/div/div/div[2]"
        )
        chat.click()
        self.wait(2)

    def get_last_assistant_message(self) -> Optional[str]:
        """
        Retrieve the last incoming message from the open chat.

        This method checks the chat DOM structure for messages with a `data-id`
        attribute starting with "false_", which indicates incoming messages (e.g., from the assistant).
        If such a message is found, its text is returned.

        :return: The text of the last assistant message or None if no incoming messages are found.
        """
        try:
            container = self.driver.find_element(
                By.XPATH, "/html/body/div[1]/div/div/div[3]/div[4]/div/div[3]/div/div[2]/div[3]"
            )
            rows = container.find_elements(By.XPATH, "./div[@role='row']")
            for row in reversed(rows):
                msg_container = row.find_elements(By.XPATH, ".//div[@data-id]")
                if not msg_container:
                    continue
                data_id = msg_container[0].get_attribute("data-id")
                if data_id.startswith("false_"):
                    text_elems = msg_container[0].find_elements(
                        By.XPATH, ".//span[contains(@class,'selectable-text') and contains(@class,'copyable-text')]"
                    )
                    if text_elems:
                        return text_elems[-1].text.strip()
            return None
        except Exception as e:
            logging.error("Error getting the last message: %s", e)
            return None

    def is_user_last_message_sender(self) -> Optional[bool]:
        """
        Determine whether the most recently retrieved message in the current chat was sent by the user or the assistant.

        The method checks the message elements:
        - If `data-id` starts with "false_", it indicates an incoming (assistant) message.
        - Otherwise, it is considered a user-sent message.

        :return: True if the last message was sent by the user, False if sent by the assistant, or None if no messages are found.
        """
        try:
            container = self.driver.find_element(
                By.XPATH, "/html/body/div[1]/div/div/div[3]/div[4]/div/div[3]/div/div[2]/div[3]"
            )
            rows = container.find_elements(By.XPATH, "./div[@role='row']")
            for row in reversed(rows):
                msg_container = row.find_elements(By.XPATH, ".//div[@data-id]")
                if not msg_container:
                    continue
                data_id = msg_container[0].get_attribute("data-id")
                if data_id.startswith("false_"):
                    return False
                else:
                    return True
            return None
        except Exception as e:
            logging.error("Error getting the last message: %s", e)
            return None

    def send_message(self, text: str):
        """
        Send a text message to the currently open chat.

        This method inputs the specified text into the chat’s message box and
        simulates pressing Enter to send the message.

        :param text: The text content of the message to be sent.
        """
        text = text.replace("\n", " ")
        input_box = self.driver.find_element(
            By.XPATH, "/html/body/div[1]/div/div/div[3]/div[4]/div/footer/div/div/span/div/div[2]/div/div[2]/div/p"
        )
        input_box.clear()
        input_box.send_keys(text)
        input_box.send_keys(Keys.ENTER)

    def interact(
        self, history: List[Dict[str, str]], messages: List[Dict[str, str]], retry_interval: int = 5
    ) -> Dict[str, str]:
        """
        Facilitate a conversation flow by sending messages and waiting for a response.

        This method:
        - Updates the message history with new messages.
        - Sends the last message from the provided messages list.
        - Waits, checking periodically for a new incoming assistant message.
        - Once an assistant message is detected, it returns that message.

        :param history: A list of dictionaries representing the conversation history, where each dict has 'role' and 'content'.
        :param messages: A list of new messages (same format as history) to send to the chat.
        :param retry_interval: The number of seconds to wait between checks for a new assistant message.
        :return: A dictionary with 'role' set to 'assistant' and 'content' containing the assistant’s reply. Returns an empty string if no reply is found.
        """
        logging.info("Starting interaction")
        history += messages
        try:
            outgoing_text = messages[-1]["content"]
            self.send_message(outgoing_text)

            attempts = 4
            assistant_msg = None
            while attempts > 0:
                self.wait(retry_interval)
                if self.is_user_last_message_sender() is False:
                    assistant_msg = self.get_last_assistant_message()
                    if assistant_msg:
                        break
                attempts -= 1

            return {"role": "assistant", "content": assistant_msg if assistant_msg else ""}
        except Exception as e:
            logging.error("Error during interaction: %s", e)
            raise

## Example

In [9]:
chat_name = "chat_name"

In [10]:
test_client = ClientWhatsAppSelenium(chat_name, slow_mode=False)

INFO:root:Opening chat


In [11]:
test_client.interact(history=[], messages=[{"role": "user", "content": "My message"}])

INFO:root:Starting interaction


{'role': 'assistant', 'content': 'Assistant message'}