In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
def scrape_table(url):
    soup = BeautifulSoup(requests.get(url).text)
    headers = [header.text for listing in soup.find_all('thead') for header in listing.find_all('th')]
    raw_data = {header:[] for header in headers}

    for rows in soup.find_all('tbody'):
      for row in rows.find_all('tr'):
        if len(row) != len(headers): continue
        for idx, cell in enumerate(row.find_all('td')):
          raw_data[headers[idx]].append(cell.text)

    return pd.DataFrame(raw_data)

In [4]:
cryptocurrencies = scrape_table("https://finance.yahoo.com/cryptocurrencies")
currencies = scrape_table("https://finance.yahoo.com/currencies")
commondaties = scrape_table("https://finance.yahoo.com/commodities")
activestocks = scrape_table("https://finance.yahoo.com/most-active")
techstocks = scrape_table("https://finance.yahoo.com/industries/software_services")
gainers = scrape_table("https://finance.yahoo.com/gainers")
losers = scrape_table("https://finance.yahoo.com/losers")
indices = scrape_table("https://finance.yahoo.com/world-indices")

In [5]:
from datetime import datetime
def convert_column_to_float(df, columns):
  for column in columns: 
      df[column] = pd.to_numeric(df[column].str.replace(',','').str.replace('%',''))
  return df

def convert_column_to_datetime(df, columns):
  for column in columns:
      df[column] = pd.to_datetime(df[column])
  return df

def revert_scaled_number(number):
  mapping = {'M': 1000000, 'B': 1000000000, 'T': 1000000000000}
  scale = number[-1]
  if scale not in ['M','B','T']:
      return float(number.replace(',',''))
  return float(number[0:-1].replace(',','')) * mapping[scale]

In [6]:
cryptocurrencies = scrape_table("https://finance.yahoo.com/cryptocurrencies")
cryptocurrencies['Market Cap'] = cryptocurrencies['Market Cap'].apply(revert_scaled_number)
cryptocurrencies['Circulating Supply'] = cryptocurrencies['Circulating Supply'].apply(revert_scaled_number)
cryptocurrencies = convert_column_to_float(cryptocurrencies, ['Price (Intraday)'])
cryptocurrencies

Unnamed: 0,Symbol,Name,Price (Intraday),Change,% Change,Market Cap,Volume in Currency (Since 0:00 UTC),Volume in Currency (24Hr),Total Volume All Currencies (24Hr),Circulating Supply,52 Week Range,1 Day Chart
0,BTC-USD,Bitcoin USD,37043.27,-1616.9,-4.18%,693695000000.0,42.156B,42.156B,42.156B,18727000.0,,
1,ETH-USD,Ethereum USD,2688.29,-114.97,-4.10%,312258000000.0,33.72B,33.72B,33.72B,116155000.0,,
2,USDT-USD,Tether USD,1.0013,0.0007,+0.07%,62020000000.0,92.796B,92.796B,92.796B,61938000000.0,,
3,BNB-USD,BinanceCoin USD,393.85,-21.57,-5.19%,60430000000.0,5B,5B,5B,153433000.0,,
4,ADA-USD,Cardano USD,1.728,-0.097,-5.31%,55205000000.0,4.54B,4.54B,4.54B,31948000000.0,,
5,DOGE-USD,Dogecoin USD,0.384399,-0.005665,-1.45%,49926000000.0,6.126B,6.126B,6.126B,129882000000.0,,
6,XRP-USD,XRP USD,0.969835,-0.062999,-6.10%,44759000000.0,4.574B,4.574B,4.574B,46151000000.0,,
7,DOT1-USD,Polkadot USD,25.19,-1.76,-6.52%,23800000000.0,2.889B,2.889B,2.889B,944866000.0,,
8,USDC-USD,USDCoin USD,1.0007,0.0001,+0.01%,22838000000.0,2.606B,2.606B,2.606B,22822000000.0,,
9,UNI3-USD,Uniswap USD,26.37,-1.83,-6.48%,14900000000.0,577.645M,577.645M,577.645M,564955000.0,,


In [7]:
# first scrape the active stocks table using the web scraper function
activestocks = scrape_table("https://finance.yahoo.com/most-active")
# change the data type of the dataframe columns
activestocks = convert_column_to_float(activestocks, ['% Change'])

# filter the dataframe by % Change (pos/neg)
rising = activestocks[activestocks['% Change'] > 0]
losing = activestocks[activestocks['% Change'] < 0]

In [10]:
#activestocks
#rising
losing

Unnamed: 0,Symbol,Name,Price (Intraday),Change,% Change,Volume,Avg Vol (3 month),Market Cap,PE Ratio (TTM),52 Week Range
0,AMC,"AMC Entertainment Holdings, Inc.",47.93,-3.41,-6.64,308.301M,131.546M,24.604B,,
1,BB,BlackBerry Limited,13.84,-2.04,-12.85,163.488M,28.636M,7.836B,,
2,F,Ford Motor Company,15.94,-0.05,-0.28,138.025M,78.904M,63.647B,16.04,
4,GE,General Electric Company,13.98,-0.11,-0.82,49.188M,74.455M,122.681B,,
10,TLRY,"Tilray, Inc.",18.74,-0.98,-4.97,33.038M,24.431M,8.361B,,
14,TAL,TAL Education Group,30.01,-3.26,-9.8,26.838M,7.788M,19.353B,,
15,AAL,American Airlines Group Inc.,24.34,-0.59,-2.37,25.932M,39.118M,15.611B,,
21,PSTH,"Pershing Square Tontine Holdings, Ltd.",21.94,-3.1,-12.4,20.939M,2.66M,4.389B,,
24,GM,General Motors Company,63.22,-0.24,-0.39,18.637M,18.343M,91.704B,10.2,


In [11]:
rising = rising.sort_values(by=['% Change'], ascending=False)
losing = losing.sort_values(by=['% Change'], ascending=True)