In [3]:
# import requested library
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [6]:
# request the url of the cryptocurrency webpage in Yahoo
url = "https://finance.yahoo.com/cryptocurrencies/"

# user-agent needs to be specified in my case.
headers = {"User-Agent": "Mozilla/5.0"}

# text formatted html page
source = requests.get(url, headers=headers).text

# make a soup soup of it!
soup = BeautifulSoup(source)

In [8]:
# Find tables in the soup, they are in the "tr"-->"td" tags
# Then append them by iterating through list of "td"s which
    # have list of rows

table = soup.table
table_rows = table.find_all('tr')
table_ls = []

for tr in table_rows:
    td = tr.find_all('td')
    row = [i.text for i in td]
    table_ls.append(row)

In [9]:
# Make a dataframe out of table list

df = pd.DataFrame(table_ls)

In [10]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,,,,,,,,,,,,
1,BTC-USD,Bitcoin USD,60508.26,250.5,+0.42%,1.142T,40.153B,40.153B,40.153B,18.876M,,
2,ETH-USD,Ethereum USD,4242.35,-15.62,-0.37%,502.214B,23.081B,23.081B,23.081B,118.381M,,
3,BNB-USD,BinanceCoin USD,580.43,-5.16,-0.88%,96.816B,2.865B,2.865B,2.865B,166.801M,,
4,USDT-USD,Tether USD,1.0007,0.0008,+0.08%,73.815B,85.984B,85.984B,85.984B,73.761B,,


In [11]:
# Drop entirely-None rows

df.dropna(axis=0, inplace=True)

In [12]:
# Drop entirely-None columns (11 and 10)
df = df.drop([10, 11], axis=1)

# Replace "," in the strings to turn them into integers
df = df.replace(",", "", regex=True)

In [13]:
# Replace "%" in the strings to turn them into integers
# Copy it into new dataframe
df2 = df.replace("%", "", regex=True)

In [14]:
# Replace the B(illion), M(illion) and T(rillion) symbols and
    # Convert them in Million unit by dividing/multiplying with 1000 or leaving it as it is

for i in df2.values:
    for k, j in enumerate(i):
        try:
            if "B" in j:
                j = j.replace("B", "")
                i[k] = float(j)
                continue
            elif "T" in j:
                j = j.replace("T", "")
                i[k] = float(j) * 1000
                continue
            elif "M" in j:
                j = j.replace("M", "")
                i[k] = float(j) / 1000
                continue
            elif "%" in j:
                j = j.replace("%", "")
                i[k] = float(j) / 100
                continue
            else:
                pass
        except (ValueError, TypeError):
            pass

In [16]:
df2.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
1,BTC-USD,Bitcoin USD,60508.26,250.5,0.42,1142.0,40.153,40.153,40.153,0.018876
2,ETH-USD,Ethereum USD,4242.35,-15.62,-0.37,502.214,23.081,23.081,23.081,0.118381
3,BNB-USD,BinanceCoin USD,580.43,-5.16,-0.88,96.816,2.865,2.865,2.865,0.166801
4,USDT-USD,Tether USD,1.0007,0.0008,0.08,73.815,85.984,85.984,85.984,73.761
5,SOL1-USD,Solana USD,215.03,-8.45,-3.78,65.255,3.143,3.143,3.143,0.30347


In [17]:
# Change the column names into actual ones

new_cols = ['Symbol', 'Name', 'Price', 'Change', '%Change', 'Market_cap', "Volume_currency",
            "Volume_last24", "Total Volume All Currencies (24Hr)", "Circulating Supply"]

new_names_map = {df2.columns[i]: new_cols[i] for i in range(len(new_cols))}
df2.rename(new_names_map, axis=1, inplace=True)

In [18]:
# Convert the "object" type columns having actually integers into numeric

df2[['Price', 'Change', '%Change', 'Market_cap', "Volume_currency",
     "Volume_last24", "Total Volume All Currencies (24Hr)", "Circulating Supply"]] = df2[
    ['Price', 'Change', '%Change', 'Market_cap', "Volume_currency",
     "Volume_last24", "Total Volume All Currencies (24Hr)", "Circulating Supply"]].apply(pd.to_numeric)

In [19]:
df2.head()

Unnamed: 0,Symbol,Name,Price,Change,%Change,Market_cap,Volume_currency,Volume_last24,Total Volume All Currencies (24Hr),Circulating Supply
1,BTC-USD,Bitcoin USD,60508.26,250.5,0.42,1142.0,40.153,40.153,40.153,0.018876
2,ETH-USD,Ethereum USD,4242.35,-15.62,-0.37,502.214,23.081,23.081,23.081,0.118381
3,BNB-USD,BinanceCoin USD,580.43,-5.16,-0.88,96.816,2.865,2.865,2.865,0.166801
4,USDT-USD,Tether USD,1.0007,0.0008,0.08,73.815,85.984,85.984,85.984,73.761
5,SOL1-USD,Solana USD,215.03,-8.45,-3.78,65.255,3.143,3.143,3.143,0.30347


In [22]:
df2.dtypes

Symbol                                 object
Name                                   object
Price                                 float64
Change                                float64
%Change                               float64
Market_cap                            float64
Volume_currency                       float64
Volume_last24                         float64
Total Volume All Currencies (24Hr)    float64
Circulating Supply                    float64
dtype: object