In [33]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

In [30]:
driver = webdriver.Chrome()
driver.maximize_window()


# Explicit Wait
wait = WebDriverWait(driver, 5)


# A function to check if the URL is fully Loaded.
def wait_for_page_to_load(driver, wait):
    
    page_title = driver.title
    try:
        wait.until(
            lambda d: d.execute_script("return document.readyState") == "complete"
        )
        
    except:
        print(f"The page \"{page_title}\" did not get fully loaded within the given duration.\n")
        
    else:
        print(f"The page \"{page_title}\" is successfully loaded.\n")


url = "https://finance.yahoo.com/"
driver.get(url)
wait_for_page_to_load(driver, wait)


# Hovering on Market Menu
actions = ActionChains(driver)
markets_menu = wait.until(
    EC.presence_of_element_located((By.XPATH, '/html[1]/body[1]/div[2]/header[1]/div[1]/div[1]/div[1]/div[4]/div[1]/div[1]/ul[1]/li[3]/a[1]/span[1]'))
)
actions.move_to_element(markets_menu).perform()


# Click on Trending Tickers
trending_tickers = wait.until(
    EC.element_to_be_clickable((By.XPATH, '/html[1]/body[1]/div[2]/header[1]/div[1]/div[1]/div[1]/div[4]/div[1]/div[1]/ul[1]/li[3]/div[1]/ul[1]/li[4]/a[1]/div[1]'))
)
trending_tickers.click()
wait_for_page_to_load(driver, wait)


# Click on Most Active Menu
most_active = wait.until(
    EC.element_to_be_clickable((By.XPATH, '/html[1]/body[1]/div[2]/main[1]/section[1]/section[1]/section[1]/article[1]/section[1]/div[1]/nav[1]/ul[1]/li[1]/a[1]/span[1]'))
)
most_active.click()
wait_for_page_to_load(driver, wait)


# Scraping The Data
data = []

while True:
    # Scraping
    
    wait.until(
        EC.presence_of_all_elements_located((By.TAG_NAME, "table"))
    )
    rows = driver.find_elements(By.CSS_SELECTOR, "table tbody tr")
    
    for row in rows:
        values = row.find_elements(By.TAG_NAME, "td")
        stock = {
            
            "Name": values[1].text,
            "Symbol": values[0].text,
            "Price": values[3].text,
            "Change": values[4].text,
            "Change %": values[5].text,
            "Volume": values[6].text,
            "Avg Vol (3M)": values[7].text,
            "Market Cap": values[8].text,
            "P/E Ratio (TTM)": values[9].text,
            "52 Week Change %": values[10].text
        }
        data.append(stock)    
    
    
    # Clicking Next
    try:
        next_button = wait.until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="nimbus-app"]/section/section/section/article/section[1]/div/div[3]/div[3]/button[3]'))
        )
    except:
        print("The Next Button is not clickable. We have navigated through all the pages.\n")
        break
    else:
        next_button.click()
        time.sleep(1)   

driver.quit()

The page "Yahoo Finance - Stock Market Live, Quotes, Business & Finance News" is successfully loaded.

The page "Yahoo Finance - Stock Market Live, Quotes, Business & Finance News" is successfully loaded.

The page "Top Trending Stocks: US stocks with the highest interest today - Yahoo Finance" is successfully loaded.

The Next Button is not clickable. We have navigated through all the pages.



In [31]:
data

[{'Name': 'International Business Machines Corporation',
  'Symbol': 'IBM',
  'Price': '227.42',
  'Change': '-18.06',
  'Change %': '-7.36%',
  'Volume': '11.355M',
  'Avg Vol (3M)': '5.251M',
  'Market Cap': '210.878B',
  'P/E Ratio (TTM)': '35.37',
  '52 Week Change %': '+45.33%'},
 {'Name': 'PepsiCo, Inc.',
  'Symbol': 'PEP',
  'Price': '135.24',
  'Change': '-7.02',
  'Change %': '-4.93%',
  'Volume': '8.375M',
  'Avg Vol (3M)': '110.514M',
  'Market Cap': '250.065B',
  'P/E Ratio (TTM)': '561.16',
  '52 Week Change %': '+364.39%'},
 {'Name': 'Tesla, Inc.',
  'Symbol': 'TSLA',
  'Price': '255.21',
  'Change': '+4.47',
  'Change %': '+1.78%',
  'Volume': '65.517M',
  'Avg Vol (3M)': '114.156M',
  'Market Cap': '822.023B',
  'P/E Ratio (TTM)': '145.01',
  '52 Week Change %': '+47.34%'},
 {'Name': 'Ford Motor Company',
  'Symbol': 'F',
  'Price': '10.02',
  'Change': '+0.24',
  'Change %': '+2.50%',
  'Volume': '54.382M',
  'Avg Vol (3M)': '126.27M',
  'Market Cap': '39.865B',
  'P/E

In [32]:
len(data)

164

In [57]:
stocks_df = (
    pd
    .DataFrame(data)
    .apply(lambda col: col.str.strip() if col.dtype == "object" else col)
    .assign(
        Price = lambda df_: pd.to_numeric(df_.Price),
        Change = lambda df_: pd.to_numeric(df_.Change.str.replace("+",""))
    )
    # .Price.str.extract(r"([^0-9.])", expand=False).unique()
    .rename(columns={ "Price" : "Price (USD)" })
    
    
)

stocks_df

Unnamed: 0,Name,Symbol,Price (USD),Change,Change %,Volume,Avg Vol (3M),Market Cap,P/E Ratio (TTM),52 Week Change %
0,International Business Machines Corporation,IBM,227.420,-18.060,-7.36%,11.355M,5.251M,210.878B,35.37,+45.33%
1,"PepsiCo, Inc.",PEP,135.240,-7.020,-4.93%,8.375M,110.514M,250.065B,561.16,+364.39%
2,"Tesla, Inc.",TSLA,255.210,4.470,+1.78%,65.517M,114.156M,822.023B,145.01,+47.34%
3,Ford Motor Company,F,10.020,0.240,+2.50%,54.382M,126.27M,39.865B,6.87,-25.00%
4,Intel Corporation,INTC,21.410,0.820,+3.96%,53.468M,113.216M,93.338B,--,-41.36%
...,...,...,...,...,...,...,...,...,...,...
159,"ASE Technology Holding Co., Ltd.",ASX,8.610,0.090,+1.12%,5.087M,12.145M,18.71B,19.58,-16.06%
160,NexGen Energy Ltd.,NXE,5.120,0.180,+3.64%,5.052M,10.414M,2.917B,--,-35.93%
161,"Lumen Technologies, Inc.",LUMN,3.355,0.075,+2.29%,5.045M,14.116M,3.439B,--,+171.07%
162,Novo Nordisk A/S,NVO,62.630,1.330,+2.16%,5.046M,8.738M,281.983B,17.95,-51.27%
