In [1]:
import time
import numpy as np
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

In [2]:
class StocksScraper:
    
    
    def __init__(self, driver, timeout=10):
        self.driver = driver
        self.wait = WebDriverWait(self.driver, timeout=timeout)
        self.data = []
    
    
    def wait_for_page_to_load(self):
        page_title = self.driver.title
        try:
            self.wait.until(
                lambda d: d.execute_script("return document.readyState") == "complete"
            )
        except:
            print(f"The page \"{page_title}\" did not get fully loaded within the given duration.\n")
        
        else:
            print(f"The page \"{page_title}\" is successfully loaded.\n")
    
    
    def access_url(self, url):
        self.driver.get(url)
        self.wait_for_page_to_load()
    
    
    def access_most_active_stocks(self):
        # Hovering on Market Menu
        actions = ActionChains(self.driver)
        markets_menu = self.wait.until(
            EC.presence_of_element_located((By.XPATH, '/html[1]/body[1]/div[2]/header[1]/div[1]/div[1]/div[1]/div[4]/div[1]/div[1]/ul[1]/li[3]/a[1]/span[1]'))
        )
        actions.move_to_element(markets_menu).perform()

        # Click on Trending Tickers
        trending_tickers = self.wait.until(
            EC.element_to_be_clickable((By.XPATH, '/html[1]/body[1]/div[2]/header[1]/div[1]/div[1]/div[1]/div[4]/div[1]/div[1]/ul[1]/li[3]/div[1]/ul[1]/li[4]/a[1]/div[1]'))
        )
        trending_tickers.click()
        self.wait_for_page_to_load()

        # Click on Most Active Menu
        most_active = self.wait.until(
            EC.element_to_be_clickable((By.XPATH, '/html[1]/body[1]/div[2]/main[1]/section[1]/section[1]/section[1]/article[1]/section[1]/div[1]/nav[1]/ul[1]/li[1]/a[1]/span[1]'))
        )
        most_active.click()
        self.wait_for_page_to_load()
    
    
    def extract_stocks_data(self):
        while True:
            # Scraping
            self.wait.until(
                EC.presence_of_all_elements_located((By.TAG_NAME, "table"))
            )
            rows = self.driver.find_elements(By.CSS_SELECTOR, "table tbody tr")
            
            for row in rows:
                values = row.find_elements(By.TAG_NAME, "td")
                stock = {
                    
                    "Name": values[1].text,
                    "Symbol": values[0].text,
                    "Price": values[3].text,
                    "Change": values[4].text,
                    "Volume": values[6].text,
                    "Avg_Vol_3M": values[7].text,
                    "Market_Cap": values[8].text,
                    "PE_Ratio_TTM": values[9].text
                }
                self.data.append(stock)    
            
            # Clicking Next
            try:
                next_button = self.wait.until(
                    EC.element_to_be_clickable((By.XPATH, '//*[@id="nimbus-app"]/section/section/section/article/section[1]/div/div[3]/div[3]/button[3]'))
                )
            except:
                print("The Next Button is not clickable. We have navigated through all the pages.\n")
                break
            else:
                next_button.click()
                time.sleep(1)
    
    
    def clean_and_save_the_data(self, filename="temp"):
        stocks_df = (
            pd
            .DataFrame(self.data)
            .apply(lambda col: col.str.strip() if col.dtype == "object" else col)
            .assign(
                Price = lambda df_: pd.to_numeric(df_.Price),
                Change = lambda df_: pd.to_numeric(df_.Change.str.replace("+","")),
                Volume = lambda df_: pd.to_numeric(df_.Volume.str.replace("M","")),
                Market_Cap = lambda df_: df_.Market_Cap.apply(lambda val: float(val.replace("B","")) if "B" in val else float(val.replace("T","")) * 1000),
                PE_Ratio_TTM = lambda df_: (
                    df_
                    .PE_Ratio_TTM
                    .replace("--", np.nan)
                    .str.replace("," ,"")
                    .pipe(lambda col: pd.to_numeric(col))
                ),
                Avg_Vol_3M = lambda df_: (
                    df_
                    .Avg_Vol_3M
                    .str.replace("M","")
                    .str.replace(",","")
                    .pipe(pd.to_numeric)
                )
            )
            .rename(columns={ 
                "Price" : "Price_(USD)",
                "Volume" : "Volume_(in_Millions)",
                "Market_Cap" : "Market_Cap_(in_Billions)",
                "Avg_Vol_3M": "Avg_Vol_3M_(in_Millions)"
            })
        )
        
        stocks_df.to_excel(f"{filename}.xlsx", index=False)

In [3]:
if __name__ == "__main__":
    
    driver = webdriver.Chrome()
    driver.maximize_window()
    
    url = "https://finance.yahoo.com/"
    scraper = StocksScraper(driver, 5)
    
    scraper.access_url(url)
    scraper.access_most_active_stocks()
    scraper.extract_stocks_data()
    scraper.clean_and_save_the_data("Yahoo-Finance-Stocks")
    
    driver.quit()

The page "Yahoo Finance - Stock Market Live, Quotes, Business & Finance News" is successfully loaded.

The page "Yahoo Finance - Stock Market Live, Quotes, Business & Finance News" is successfully loaded.

The page "Top Trending Stocks: US stocks with the highest interest today - Yahoo Finance" is successfully loaded.

The Next Button is not clickable. We have navigated through all the pages.

