In [1]:
import pickle
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [2]:

# File path to store cookies
cookie_file_path = "cookies.pkl"

# Chrome options for headless mode and resource blocking
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})

# Initialize the Chrome WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)

# Function to save cookies after accepting them
def save_cookies(driver, path):
    with open(path, "wb") as file:
        pickle.dump(driver.get_cookies(), file)
    print("Cookies saved.")

# Function to load cookies
def load_cookies(driver, path):
    try:
        with open(path, "rb") as file:
            cookies = pickle.load(file)
            for cookie in cookies:
                driver.add_cookie(cookie)
        print("Cookies loaded.")
    except FileNotFoundError:
        print("Cookie file not found. Proceeding without loading cookies.")



In [3]:
driver.get("https://finance.yahoo.com/quote/GC%3DF/")
time.sleep(3)  # Wait for page to load fully
cookies_loaded = False

In [4]:
# Step 1: Open the page and check if cookies file exists
try:
    # Load cookies if they exist
    load_cookies(driver, cookie_file_path)
    cookies_loaded = True  # Cookies were successfully loaded
    print("Cookies were loaded, skipping cookie acceptance.")
    driver.refresh()  # Refresh the page to apply loaded cookies
except Exception as e:
    print("Error loading cookies:", e)
    cookies_loaded = False


Error loading cookies: Message: invalid cookie domain: Cookie 'domain' mismatch
  (Session info: chrome=130.0.6723.92)
Stacktrace:
0   chromedriver                        0x000000010061f648 cxxbridge1$str$ptr + 3645404
1   chromedriver                        0x0000000100617ea8 cxxbridge1$str$ptr + 3614780
2   chromedriver                        0x0000000100084104 cxxbridge1$string$len + 88416
3   chromedriver                        0x000000010010ea64 cxxbridge1$string$len + 656064
4   chromedriver                        0x00000001000ffbd0 cxxbridge1$string$len + 594988
5   chromedriver                        0x00000001000baf54 cxxbridge1$string$len + 313264
6   chromedriver                        0x00000001000bbba4 cxxbridge1$string$len + 316416
7   chromedriver                        0x00000001005ea1e8 cxxbridge1$str$ptr + 3427196
8   chromedriver                        0x00000001005ed52c cxxbridge1$str$ptr + 3440320
9   chromedriver                        0x00000001005d160c cxxbridge

In [5]:
# Step 2: If no cookies were loaded (e.g., first visit), accept cookies manually and save them
if not cookies_loaded: 
    try:
        accept_button = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Accept all')]"))
        )
        accept_button.click()
        print("Clicked on the cookie acceptance button.")
        time.sleep(2)  # Wait for cookies to be set
        save_cookies(driver, cookie_file_path)  # Save cookies for future use
    except Exception as e:
        print("Cookie acceptance button not found or already accepted:", e)


Clicked on the cookie acceptance button.
Cookies saved.


In [6]:
# Proceed with scraping after handling cookies
print(driver.page_source[0:500])  # Example: print the first 500 characters of the page source

# Step 3: Simulate hover to trigger dynamic content
try:
    # Locate the element to hover over
    element_to_hover = WebDriverWait(driver, 10).until(
        EC.visibility_of_element_located((By.CSS_SELECTOR, ".chartContainer"))  # Replace with  chartContainer stx-crosshair-cursor-on
    )
    
    # Perform hover action
    actions = ActionChains(driver)
    actions.move_to_element(element_to_hover).perform()
    print("Hovered over the element to trigger dynamic content.")
    
    # Wait a moment to allow dynamic content to load
    time.sleep(2)
    
    # Parse the page source with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    # Extract tooltip values
    results = soup.find_all('table', attrs={'class': 'hu-tooltip'})
    all_values = []  # List to store all extracted values
    if results:
        first_result = results[0]
        values = [td.text.strip() for td in first_result.find_all('td', class_='hu-tooltip-value')]
        all_values.extend(values)  # Store the current extracted values
        print("Extracted values:", values)
    else:
        print("No tooltip tables found.")

    # Print the last 10 values if available
    last_10_values = all_values[-10:] if len(all_values) >= 10 else all_values
    print("Last 10 values:", last_10_values)
        
except Exception as e:
    print("Error during hover simulation or data extraction:", e)

# Close the driver when done
driver.quit()

<html lang="en-US" theme="light" data-color-scheme="light" class="desktop neo-green dock-upscale" ciq-last-interaction=""><head>
        <meta charset="utf-8">
        <meta name="oath:guce:consent-host" content="guce.yahoo.com">
        <link rel="preconnect" href="//s.yimg.com" crossorigin="anonymous"><link rel="preconnect" href="//geo.yahoo.com"><link rel="preconnect" href="//query1.finance.yahoo.com"><link rel="preconnect" href="//query2.finance.yahoo.com"><link rel="preconnect" href="//cons
Hovered over the element to trigger dynamic content.
Extracted values: ['11/1 07:08', '2,762.00', '2,760.80', '2,762.10', '2,760.60', '217']
Last 10 values: ['11/1 07:08', '2,762.00', '2,760.80', '2,762.10', '2,760.60', '217']
