Web scraping from cryptocurrency data from Coin Market Cap

Source: [Coin Market Cap](https://coinmarketcap.com)

Tutorial: [Data Science Web Apps Tutorial by Data Professor](https://www.youtube.com/watch?v=JwSS70SZdyM&list=PLRpb1EfB9cjuOFw_ZVmeqlCgdhYjdbBMO&index=13&ab_channel=freeCodeCamp.org)

# Web Scraping

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import json

In [2]:
cmc = requests.get('https://coinmarketcap.com')
soup = BeautifulSoup(cmc.content, 'html.parser')

data = soup.find('script', id='__NEXT_DATA__', type='application/json')
    
coin_data = json.loads(data.contents[0])
listings = coin_data['props']['initialState']['cryptocurrency']['listingLatest']['data']

In [7]:
# printing out one listing
listings[0]

{'id': 1,
 'name': 'Bitcoin',
 'symbol': 'BTC',
 'slug': 'bitcoin',
 'tags': ['mineable',
  'pow',
  'sha-256',
  'store-of-value',
  'state-channels',
  'coinbase-ventures-portfolio',
  'three-arrows-capital-portfolio',
  'polychain-capital-portfolio',
  'binance-labs-portfolio',
  'arrington-xrp-capital',
  'blockchain-capital-portfolio',
  'boostvc-portfolio',
  'cms-holdings-portfolio',
  'dcg-portfolio',
  'dragonfly-capital-portfolio',
  'electric-capital-portfolio',
  'fabric-ventures-portfolio',
  'framework-ventures',
  'galaxy-digital-portfolio',
  'huobi-capital',
  'alameda-research-portfolio',
  'a16z-portfolio',
  '1confirmation-portfolio',
  'winklevoss-capital',
  'usv-portfolio',
  'placeholder-ventures-portfolio',
  'pantera-capital-portfolio',
  'multicoin-capital-portfolio',
  'paradigm-xzy-screener'],
 'cmcRank': 1,
 'marketPairCount': 9187,
 'circulatingSupply': 18746181,
 'totalSupply': 18746181,
 'maxSupply': 21000000,
 'ath': 64863.0989077,
 'atl': 65.52600098,

The website changed the name of the key, hence the old code from the tutorial does not work.

In [8]:
def load_data():
    cmc = requests.get('https://coinmarketcap.com')
    soup = BeautifulSoup(cmc.content, 'html.parser')
    currency_price_unit = 'USD'

    data = soup.find('script', id='__NEXT_DATA__', type='application/json')
    coins = {}
    coin_data = json.loads(data.contents[0])
    listings = coin_data['props']['initialState']['cryptocurrency']['listingLatest']['data']

    for i in listings:
      coins[str(i['id'])] = i['slug']

    coin_name = []
    coin_symbol = []
    market_cap = []
    percent_change_1h = []
    percent_change_24h = []
    percent_change_7d = []
    price = []
    volume_24h = []

    for i in listings: 
        coin_name.append(i['slug'])
        coin_symbol.append(i['symbol'])
        price.append(i['quote'][currency_price_unit]['price'])
        percent_change_1h.append(i['quote'][currency_price_unit]['percentChange1h']) # the website change the key name
        percent_change_24h.append(i['quote'][currency_price_unit]['percentChange24h'])
        percent_change_7d.append(i['quote'][currency_price_unit]['percentChange7d'])
        market_cap.append(i['quote'][currency_price_unit]['marketCap'])
        volume_24h.append(i['quote'][currency_price_unit]['volume24h'])
        

    df = pd.DataFrame(columns=['coin_name', 'coin_symbol', 'market_cap', 'percent_change_1h', 'percent_change_24h', 'percent_change_7d', 'price', 'volume_24h'])
    df['coin_name'] = coin_name
    df['coin_symbol'] = coin_symbol
    df['price'] = price
    df['percent_change_1h'] = percent_change_1h
    df['percent_change_24h'] = percent_change_24h
    df['percent_change_7d'] = percent_change_7d
    df['market_cap'] = market_cap
    df['volume_24h'] = volume_24h
    return df

In [9]:
df = load_data()
df

Unnamed: 0,coin_name,coin_symbol,market_cap,percent_change_1h,percent_change_24h,percent_change_7d,price,volume_24h
0,bitcoin,BTC,6.241803e+11,-0.136898,-0.674186,-2.931153,33296.396960,3.304289e+10
1,ethereum,ETH,2.387977e+11,-0.451891,-3.773189,5.770154,2049.307398,2.503999e+10
2,tether,USDT,6.240191e+10,0.004256,0.018275,0.022471,1.000449,5.032566e+10
3,binance-coin,BNB,4.294408e+10,-0.583587,-2.597234,-5.113145,279.888372,1.269051e+09
4,cardano,ADA,4.195351e+10,0.297806,-0.581604,-2.196197,1.313250,1.879520e+09
...,...,...,...,...,...,...,...,...
95,0x,ZRX,5.631966e+08,-0.782310,-7.890894,2.697203,0.666250,4.512286e+07
96,fantom,FTM,5.599718e+08,-0.412596,-1.526639,-3.446910,0.220028,3.508396e+07
97,omg,OMG,5.540938e+08,-0.488088,-4.721405,4.278313,3.950887,1.536432e+08
98,husd,HUSD,5.540388e+08,-0.017008,0.024091,0.016867,1.000258,4.516387e+08


In [11]:
df_change = pd.concat([df.coin_symbol, df.percent_change_1h, df.percent_change_24h, df.percent_change_7d], axis=1)
df_change = df_change.set_index('coin_symbol')
df_change['positive_percent_change_1h'] = df_change['percent_change_1h'] > 0
df_change['positive_percent_change_24h'] = df_change['percent_change_24h'] > 0
df_change['positive_percent_change_7d'] = df_change['percent_change_7d'] > 0
df_change

Unnamed: 0_level_0,percent_change_1h,percent_change_24h,percent_change_7d,positive_percent_change_1h,positive_percent_change_24h,positive_percent_change_7d
coin_symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BTC,-0.136898,-0.674186,-2.931153,False,False,False
ETH,-0.451891,-3.773189,5.770154,False,False,True
USDT,0.004256,0.018275,0.022471,True,True,True
BNB,-0.583587,-2.597234,-5.113145,False,False,False
ADA,0.297806,-0.581604,-2.196197,True,False,False
...,...,...,...,...,...,...
ZRX,-0.782310,-7.890894,2.697203,False,False,True
FTM,-0.412596,-1.526639,-3.446910,False,False,False
OMG,-0.488088,-4.721405,4.278313,False,False,True
HUSD,-0.017008,0.024091,0.016867,False,True,True
