In [1]:
import time
import pandas as pd
import numpy as np
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.action_chains import ActionChains


service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

driver.maximize_window()

wait = WebDriverWait(driver, 5)

def wait_for_page_to_load(driver, wait):

    page_title = driver.title
    
    try:
        
        wait.until(
            lambda driver : driver.execute_script("return document.readyState") == 'complete'
        )
    
    except Exception as e:
        print(f"page {page_title} was not loaded within time with error msg : {e}")
    
    else:
        print(f"page {page_title} was loaded within time")


url = "https://finance.yahoo.com/"
driver.get(url)

wait_for_page_to_load(driver, wait)

actions = ActionChains(driver)

market_menu = wait.until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="navigation-container"]/ol/li[3]/a/div'))
)

actions.move_to_element(market_menu).perform()


stocks_menu = wait.until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="navigation-container"]/ol/li[3]/ol/li[1]/a/span'))
)

actions.move_to_element(stocks_menu).perform()

trending_menu = wait.until(
    EC.element_to_be_clickable((By.XPATH, '//*[@id="navigation-container"]/ol/li[3]/ol/li[1]/ol/li[4]/a/span'))
)

trending_menu.click()

wait_for_page_to_load(driver, wait)

most_active = wait.until(
    EC.element_to_be_clickable((By.XPATH, '//*[@id="tab-most-active"]'))
)

most_active.click()

wait_for_page_to_load(driver, wait)

data = []


while True:

    wait.until(
        EC.presence_of_element_located((By.TAG_NAME, 'table'))
    )

    rows = driver.find_elements(By.CSS_SELECTOR, 'table tbody tr')
    for row in rows:
        values = row.find_elements(By.TAG_NAME, 'td')
        print([val.text for val in values])
        stock = {
            "name": values[1].text,
            "symbol": values[0].text,
            "price": values[3].text,
            "change": values[4].text,
            "volumn": values[6].text,
            "market_cap": values[8].text,
            "pe_ratio": values[9].text
        }
        data.append(stock)
    
    try:
        
        next_button = wait.until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="main-content-wrapper"]/section[1]/div/div[4]/div[3]/button[3]'))
        )

    except:
        print("the \"next\" button is not clickable as we have gone through all pages.")
        break
    else:
        next_button.click()
        time.sleep(1)
    



driver.quit()

page Yahoo Finance - Stock Market Live, Quotes, Business & Finance News was loaded within time
page Yahoo Finance - Stock Market Live, Quotes, Business & Finance News was loaded within time
page Top Trending Stocks: US stocks with the highest interest today - Yahoo Finance was loaded within time
['BABA', 'Alibaba Group Holding Limited', '', '153.80', '-6.04', '-3.78%', '33.728M', '18.989M', '382.864B', '17.76', '+80.43%', '80.06\n192.67']
['CDTX', 'Cidara Therapeutics, Inc.', '', '217.71', '+111.72', '+105.41%', '17.287M', '866,718', '6.845B', '--', '+637.58%', '13.90\n218.85']
['NFLX', 'Netflix, Inc.', '', '1,112.17', '-42.06', '-3.64%', '4.706M', '3.558M', '471.262B', '46.55', '+40.08%', '809.33\n1,341.15']
['STUB', 'StubHub Holdings, Inc.', '', '14.87', '-3.95', '-20.99%', '17.093M', '5.075M', '5.469B', '--', '-14.45%', '12.91\n27.89']
['DVLT', 'Datavault AI Inc.', '', '1.8300', '+0.2900', '+18.83%', '47.212M', '68.626M', '521.031M', '0.36', '-25.60%', '0.25\n4.10']
['CYPH', 'Cypher

In [2]:
len(data)

307

In [3]:
stocks_df = pd.DataFrame(data)
stocks_df

Unnamed: 0,name,symbol,price,change,volumn,market_cap,pe_ratio
0,Alibaba Group Holding Limited,BABA,153.80,-6.04,33.728M,382.864B,17.76
1,"Cidara Therapeutics, Inc.",CDTX,217.71,+111.72,17.287M,6.845B,--
2,"Netflix, Inc.",NFLX,1112.17,-42.06,4.706M,471.262B,46.55
3,"StubHub Holdings, Inc.",STUB,14.87,-3.95,17.093M,5.469B,--
4,Datavault AI Inc.,DVLT,1.8300,+0.2900,47.212M,521.031M,0.36
...,...,...,...,...,...,...,...
302,"Figure Technology Solutions, Inc.",FIGR,40.24,+5.65,5.101M,8.611B,28.94
303,The Mosaic Company,MOS,26.05,+0.50,5.097M,8.268B,6.65
304,Gen Digital Inc.,GEN,26.70,-0.02,5.084M,16.466B,29.82
305,"Chime Financial, Inc.",CHYM,18.90,-0.29,5.065M,7.08B,--


In [4]:
stocks_df.info

<bound method DataFrame.info of                                   name symbol     price   change   volumn  \
0        Alibaba Group Holding Limited   BABA    153.80    -6.04  33.728M   
1            Cidara Therapeutics, Inc.   CDTX    217.71  +111.72  17.287M   
2                        Netflix, Inc.   NFLX  1,112.17   -42.06   4.706M   
3               StubHub Holdings, Inc.   STUB     14.87    -3.95  17.093M   
4                    Datavault AI Inc.   DVLT    1.8300  +0.2900  47.212M   
..                                 ...    ...       ...      ...      ...   
302  Figure Technology Solutions, Inc.   FIGR     40.24    +5.65   5.101M   
303                 The Mosaic Company    MOS     26.05    +0.50   5.097M   
304                   Gen Digital Inc.    GEN     26.70    -0.02   5.084M   
305              Chime Financial, Inc.   CHYM     18.90    -0.29   5.065M   
306                          Coty Inc.   COTY      3.50    -0.13   5.023M   

    market_cap pe_ratio  
0     382.864B   

In [17]:
stocks_df.describe

<bound method NDFrame.describe of                                       name symbol   price  change    volumn  \
0                              Tesla, Inc.   TSLA  445.91  -16.35  104.867M   
1                                Snap Inc.   SNAP    8.01   +0.71  155.951M   
2                              Pfizer Inc.    PFE   24.85   +0.24  111.564M   
3                              Tesla, Inc.   TSLA  445.88  -16.38   96.048M   
4               Opendoor Technologies Inc.   OPEN    6.56   -0.67   89.793M   
..                                     ...    ...     ...     ...       ...   
352  Vodafone Group Public Limited Company    VOD   11.34   +0.07    5.046M   
353                       Fastenal Company   FAST   40.77   -0.91    5.043M   
354            DigitalOcean Holdings, Inc.   DOCN   47.08   +1.27    5.043M   
355                   Fluence Energy, Inc.   FLNC   19.82   -0.90    5.019M   
356            International Paper Company     IP   36.54   +0.22     5.01M   

    market_cap pe

In [89]:
stocks_df.apply(lambda col : col.str.strip() if col.dtype == 'object' else col)

Unnamed: 0,name,symbol,price,change,volumn,market_cap,pe_ratio
0,"Tesla, Inc.",TSLA,445.91,-16.35,104.867M,1.483T,303.34
1,Snap Inc.,SNAP,8.01,+0.71,155.951M,13.536B,--
2,Pfizer Inc.,PFE,24.85,+0.24,111.564M,195.108B,14.12
3,"Tesla, Inc.",TSLA,445.88,-16.38,96.048M,1.483T,306.57
4,Opendoor Technologies Inc.,OPEN,6.56,-0.67,89.793M,4.868B,--
...,...,...,...,...,...,...,...
352,Vodafone Group Public Limited Company,VOD,11.34,+0.07,5.046M,27.731B,--
353,Fastenal Company,FAST,40.77,-0.91,5.043M,46.805B,38.63
354,"DigitalOcean Holdings, Inc.",DOCN,47.08,+1.27,5.043M,4.306B,14.70
355,"Fluence Energy, Inc.",FLNC,19.82,-0.90,5.019M,3.617B,--


In [90]:
stocks_df.dtypes

name          object
symbol        object
price         object
change        object
volumn        object
market_cap    object
pe_ratio      object
dtype: object

In [91]:
stocks_df['price']= stocks_df['price'].astype(float)

In [92]:
stocks_df.dtypes

name           object
symbol         object
price         float64
change         object
volumn         object
market_cap     object
pe_ratio       object
dtype: object

In [93]:
import re

In [96]:
stocks_df['change'].str.extract(r'([^0-9.])')

Unnamed: 0,0
0,-
1,+
2,+
3,-
4,-
...,...
352,+
353,-
354,+
355,-


In [97]:
stocks_df['change'] = stocks_df['change'].str.replace('+','')

In [98]:
stocks_df

Unnamed: 0,name,symbol,price,change,volumn,market_cap,pe_ratio
0,"Tesla, Inc.",TSLA,445.91,-16.35,104.867M,1.483T,303.34
1,Snap Inc.,SNAP,8.01,0.71,155.951M,13.536B,--
2,Pfizer Inc.,PFE,24.85,0.24,111.564M,195.108B,14.12
3,"Tesla, Inc.",TSLA,445.88,-16.38,96.048M,1.483T,306.57
4,Opendoor Technologies Inc.,OPEN,6.56,-0.67,89.793M,4.868B,--
...,...,...,...,...,...,...,...
352,Vodafone Group Public Limited Company,VOD,11.34,0.07,5.046M,27.731B,--
353,Fastenal Company,FAST,40.77,-0.91,5.043M,46.805B,38.63
354,"DigitalOcean Holdings, Inc.",DOCN,47.08,1.27,5.043M,4.306B,14.70
355,"Fluence Energy, Inc.",FLNC,19.82,-0.90,5.019M,3.617B,--


In [99]:
stocks_df.dtypes

name           object
symbol         object
price         float64
change         object
volumn         object
market_cap     object
pe_ratio       object
dtype: object

In [100]:
stocks_df['change'] = stocks_df['change'].astype(float)

In [101]:
stocks_df.dtypes

name           object
symbol         object
price         float64
change        float64
volumn         object
market_cap     object
pe_ratio       object
dtype: object

In [102]:
stocks_df['volumn'].unique()

array(['104.867M', '155.951M', '111.564M', '96.048M', '89.793M',
       '87.095M', '85.445M', '84.826M', '83.488M', '75.885M', '72.394M',
       '67.478M', '64.594M', '64.527M', '63.903M', '63.825M', '61.748M',
       '59.97M', '52.362M', '50.624M', '49.712M', '48.187M', '47.826M',
       '47.088M', '46.26M', '44.686M', '44.58M', '44.496M', '43.782M',
       '41.993M', '41.113M', '39.93M', '39.192M', '37.737M', '37.669M',
       '37.642M', '37.013M', '36.79M', '36.497M', '36.46M', '36.373M',
       '35.987M', '35.346M', '34.95M', '34.502M', '33.947M', '33.805M',
       '32.626M', '32.397M', '32.179M', '31.884M', '31.851M', '31.593M',
       '30.912M', '29.652M', '29.269M', '29.105M', '28.747M', '28.712M',
       '28.078M', '27.645M', '27.178M', '26.499M', '26.124M', '25.628M',
       '24.605M', '24.527M', '24.389M', '24.322M', '23.19M', '22.796M',
       '22.567M', '22.524M', '22.472M', '22.175M', '22.15M', '22.11M',
       '21.962M', '21.533M', '21.109M', '20.866M', '20.814M', '20.682

In [103]:
stocks_df['volumn'] = stocks_df['volumn'].apply(lambda x : x.replace('M',''))

In [104]:
stocks_df['volumn']

0      104.867
1      155.951
2      111.564
3       96.048
4       89.793
        ...   
352      5.046
353      5.043
354      5.043
355      5.019
356       5.01
Name: volumn, Length: 357, dtype: object

In [105]:
stocks_df.dtypes

name           object
symbol         object
price         float64
change        float64
volumn         object
market_cap     object
pe_ratio       object
dtype: object

In [106]:
stocks_df['volumn'] = stocks_df['volumn'].astype(float)

In [62]:
stocks_df.dtypes

name           object
symbol         object
price         float64
change        float64
volumn        float64
market_cap     object
pe_ratio       object
dtype: object

In [107]:
stocks_df['market_cap']= stocks_df['market_cap'].apply(
    lambda x : float(x.replace('B','')) if 'B' in x else float(x.replace('T',''))*1000 
)

In [108]:
stocks_df['market_cap'].str.extract(r'([^0-9.])', expand=False).unique()

AttributeError: Can only use .str accessor with string values!

In [109]:
stocks_df['market_cap']

0      1483.000
1        13.536
2       195.108
3      1483.000
4         4.868
         ...   
352      27.731
353      46.805
354       4.306
355       3.617
356      19.292
Name: market_cap, Length: 357, dtype: float64

In [110]:
stocks_df.dtypes

name           object
symbol         object
price         float64
change        float64
volumn        float64
market_cap    float64
pe_ratio       object
dtype: object

In [111]:
stocks_df

Unnamed: 0,name,symbol,price,change,volumn,market_cap,pe_ratio
0,"Tesla, Inc.",TSLA,445.91,-16.35,104.867,1483.000,303.34
1,Snap Inc.,SNAP,8.01,0.71,155.951,13.536,--
2,Pfizer Inc.,PFE,24.85,0.24,111.564,195.108,14.12
3,"Tesla, Inc.",TSLA,445.88,-16.38,96.048,1483.000,306.57
4,Opendoor Technologies Inc.,OPEN,6.56,-0.67,89.793,4.868,--
...,...,...,...,...,...,...,...
352,Vodafone Group Public Limited Company,VOD,11.34,0.07,5.046,27.731,--
353,Fastenal Company,FAST,40.77,-0.91,5.043,46.805,38.63
354,"DigitalOcean Holdings, Inc.",DOCN,47.08,1.27,5.043,4.306,14.70
355,"Fluence Energy, Inc.",FLNC,19.82,-0.90,5.019,3.617,--


In [131]:
stocks_df['pe_ratio'].str.extract(r'([^0-9.])', expand=False).unique()

array([nan], dtype=object)

In [130]:
stocks_df['pe_ratio'] = stocks_df['pe_ratio'].str.replace(',','')

In [132]:
stocks_df['pe_ratio'].unique()

array(['303.34', '', '14.12', '306.57', '54.18', '10.93', '441.80',
       '820.32', '7.94', '15.53', '130.93', '21.36', '287.95', '170.85',
       '14.63', '13.51', '79.70', '7.82', '34.93', '36.20', '37.79',
       '4.56', '9.74', '9.14', '27.38', '11.43', '37.71', '20.99',
       '12.90', '28.73', '11.99', '28.11', '33.65', '8.38', '36.58',
       '10.64', '5.58', '13.45', '12.58', '20.02', '47.92', '27.57',
       '27.72', '10.68', '13.60', '18.73', '14.46', '16.53', '27.36',
       '9.95', '26.95', '10.62', '56.94', '386.72', '57.46', '27.59',
       '130.00', '3.90', '12.14', '73.16', '14.05', '13.47', '201.97',
       '154.61', '31.43', '5.73', '17.44', '90.41', '25.75', '12.88',
       '22.85', '16.32', '15.88', '8.60', '18.97', '22.74', '9.03',
       '28.36', '58.39', '14.35', '38.60', '84.47', '11.93', '24.83',
       '26.76', '11.10', '16.57', '6.87', '21.17', '19.91', '36.98',
       '13.27', '15.78', '21.01', '17.50', '41.67', '8.87', '1957.62',
       '178.37', '15.35', 

In [126]:
stocks_df['pe_ratio'] = stocks_df['pe_ratio'].replace(',','')

In [133]:
stocks_df['pe_ratio'] = pd.to_numeric(stocks_df['pe_ratio'])

In [140]:
stocks_df.dtypes

name             object
symbol           object
price_usd       float64
change          float64
volumn          float64
market_cap_B    float64
pe_ratio        float64
dtype: object

In [137]:
stocks_df.rename(
   columns =  {
       "price": "price_usd",
		"volume": "volume_M",
		"market_cap": "market_cap_B"
       }, inplace=True
)

In [138]:
stocks_df

Unnamed: 0,name,symbol,price_usd,change,volumn,market_cap_B,pe_ratio
0,"Tesla, Inc.",TSLA,445.91,-16.35,104.867,1483.000,303.34
1,Snap Inc.,SNAP,8.01,0.71,155.951,13.536,
2,Pfizer Inc.,PFE,24.85,0.24,111.564,195.108,14.12
3,"Tesla, Inc.",TSLA,445.88,-16.38,96.048,1483.000,306.57
4,Opendoor Technologies Inc.,OPEN,6.56,-0.67,89.793,4.868,
...,...,...,...,...,...,...,...
352,Vodafone Group Public Limited Company,VOD,11.34,0.07,5.046,27.731,
353,Fastenal Company,FAST,40.77,-0.91,5.043,46.805,38.63
354,"DigitalOcean Holdings, Inc.",DOCN,47.08,1.27,5.043,4.306,14.70
355,"Fluence Energy, Inc.",FLNC,19.82,-0.90,5.019,3.617,


In [139]:
stocks_df.isnull().sum()

name              0
symbol            0
price_usd         0
change            0
volumn            0
market_cap_B      0
pe_ratio        155
dtype: int64

In [142]:
stocks_df.to_excel("yahoo-stocks-data.xlsx", index=False)