In [1]:
import time
import numpy as np
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

In [2]:
driver = webdriver.Chrome()
driver.maximize_window()


# Explicit Wait
wait = WebDriverWait(driver, 5)


# A function to check if the URL is fully Loaded.
def wait_for_page_to_load(driver, wait):
    
    page_title = driver.title
    try:
        wait.until(
            lambda d: d.execute_script("return document.readyState") == "complete"
        )
        
    except:
        print(f"The page \"{page_title}\" did not get fully loaded within the given duration.\n")
        
    else:
        print(f"The page \"{page_title}\" is successfully loaded.\n")


url = "https://finance.yahoo.com/"
driver.get(url)
wait_for_page_to_load(driver, wait)


# Hovering on Market Menu
actions = ActionChains(driver)
markets_menu = wait.until(
    EC.presence_of_element_located((By.XPATH, '/html[1]/body[1]/div[2]/header[1]/div[1]/div[1]/div[1]/div[4]/div[1]/div[1]/ul[1]/li[3]/a[1]/span[1]'))
)
actions.move_to_element(markets_menu).perform()


# Click on Trending Tickers
trending_tickers = wait.until(
    EC.element_to_be_clickable((By.XPATH, '/html[1]/body[1]/div[2]/header[1]/div[1]/div[1]/div[1]/div[4]/div[1]/div[1]/ul[1]/li[3]/div[1]/ul[1]/li[4]/a[1]/div[1]'))
)
trending_tickers.click()
wait_for_page_to_load(driver, wait)


# Click on Most Active Menu
most_active = wait.until(
    EC.element_to_be_clickable((By.XPATH, '/html[1]/body[1]/div[2]/main[1]/section[1]/section[1]/section[1]/article[1]/section[1]/div[1]/nav[1]/ul[1]/li[1]/a[1]/span[1]'))
)
most_active.click()
wait_for_page_to_load(driver, wait)


# Scraping The Data
data = []

while True:
    # Scraping
    
    wait.until(
        EC.presence_of_all_elements_located((By.TAG_NAME, "table"))
    )
    rows = driver.find_elements(By.CSS_SELECTOR, "table tbody tr")
    
    for row in rows:
        values = row.find_elements(By.TAG_NAME, "td")
        stock = {
            
            "Name": values[1].text,
            "Symbol": values[0].text,
            "Price": values[3].text,
            "Change": values[4].text,
            "Volume": values[6].text,
            "Avg_Vol_3M": values[7].text,
            "Market_Cap": values[8].text,
            "PE_Ratio_TTM": values[9].text
        }
        data.append(stock)    
    
    
    # Clicking Next
    try:
        next_button = wait.until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="nimbus-app"]/section/section/section/article/section[1]/div/div[3]/div[3]/button[3]'))
        )
    except:
        print("The Next Button is not clickable. We have navigated through all the pages.\n")
        break
    else:
        next_button.click()
        time.sleep(1)   

driver.quit()

The page "Yahoo Finance - Stock Market Live, Quotes, Business & Finance News" is successfully loaded.

The page "Yahoo Finance - Stock Market Live, Quotes, Business & Finance News" is successfully loaded.

The page "Top Trending Stocks: US stocks with the highest interest today - Yahoo Finance" is successfully loaded.

The Next Button is not clickable. We have navigated through all the pages.



In [3]:
data

[{'Name': 'NVIDIA Corporation',
  'Symbol': 'NVDA',
  'Price': '111.01',
  'Change': '+4.58',
  'Volume': '248.902M',
  'Avg_Vol_3M': '313.93M',
  'Market_Cap': '2.709T',
  'PE_Ratio_TTM': '37.76'},
 {'Name': 'Tesla, Inc.',
  'Symbol': 'TSLA',
  'Price': '284.95',
  'Change': '+25.44',
  'Volume': '166.129M',
  'Avg_Vol_3M': '114.692M',
  'Market_Cap': '917.813B',
  'PE_Ratio_TTM': '163.76'},
 {'Name': 'Intel Corporation',
  'Symbol': 'INTC',
  'Price': '20.05',
  'Change': '-1.44',
  'Volume': '147.014M',
  'Avg_Vol_3M': '113.823M',
  'Market_Cap': '93.709B',
  'PE_Ratio_TTM': '--'},
 {'Name': 'Palantir Technologies Inc.',
  'Symbol': 'PLTR',
  'Price': '112.78',
  'Change': '+5.00',
  'Volume': '102.567M',
  'Avg_Vol_3M': '110.295M',
  'Market_Cap': '253.637B',
  'PE_Ratio_TTM': '593.58'},
 {'Name': 'Ford Motor Company',
  'Symbol': 'F',
  'Price': '10.04',
  'Change': '-0.02',
  'Volume': '84.94M',
  'Avg_Vol_3M': '125.052M',
  'Market_Cap': '40.004B',
  'PE_Ratio_TTM': '6.88'},
 {'

In [4]:
len(data)

249

In [5]:
stocks_df = (
    pd
    .DataFrame(data)
    .apply(lambda col: col.str.strip() if col.dtype == "object" else col)
    .assign(
        Price = lambda df_: pd.to_numeric(df_.Price),
        Change = lambda df_: pd.to_numeric(df_.Change.str.replace("+","")),
        Volume = lambda df_: pd.to_numeric(df_.Volume.str.replace("M","")),
        Market_Cap = lambda df_: df_.Market_Cap.apply(lambda val: float(val.replace("B","")) if "B" in val else float(val.replace("T","")) * 1000),
        PE_Ratio_TTM = lambda df_: (
            df_
            .PE_Ratio_TTM
            .replace("--", np.nan)
            .str.replace("," ,"")
            .pipe(lambda col: pd.to_numeric(col))
        ),
        Avg_Vol_3M = lambda df_: (
            df_
            .Avg_Vol_3M
            .str.replace("M","")
            .str.replace(",","")
            .pipe(pd.to_numeric)
        )
        
    )
    .rename(columns={ 
        
        "Price" : "Price_(USD)",
        "Volume" : "Volume_(in_Millions)",
        "Market_Cap" : "Market_Cap_(in_Billions)",
        "Avg_Vol_3M": "Avg_Vol_3M_(in_Millions)"
    
    })
    
    
)

stocks_df

Unnamed: 0,Name,Symbol,Price_(USD),Change,Volume_(in_Millions),Avg_Vol_3M_(in_Millions),Market_Cap_(in_Billions),PE_Ratio_TTM
0,NVIDIA Corporation,NVDA,111.01,4.58,248.902,313.930,2709.000,37.76
1,"Tesla, Inc.",TSLA,284.95,25.44,166.129,114.692,917.813,163.76
2,Intel Corporation,INTC,20.05,-1.44,147.014,113.823,93.709,
3,Palantir Technologies Inc.,PLTR,112.78,5.00,102.567,110.295,253.637,593.58
4,Ford Motor Company,F,10.04,-0.02,84.940,125.052,40.004,6.88
...,...,...,...,...,...,...,...,...
244,Hudbay Minerals Inc.,HBM,7.45,-0.11,5.082,7.841,2.975,37.25
245,Rithm Capital Corp.,RITM,10.54,0.14,5.069,4.841,5.590,6.31
246,United States Steel Corporation,X,42.68,0.42,5.064,6.801,9.663,27.18
247,BP p.l.c.,BP,29.19,0.19,5.316,12.919,76.064,208.50


In [6]:
stocks_df.to_excel("Yahoo-Stocks-Data.xlsx", index=False)