# <a href='https://coinmarketcap.com/'>Coinmarketcap</a><br>

#### Total Market Cap Data
In this section we gather the total market cap data from coinmarketcap. Since coinmarketcap doesn't provide any api or historical datatable for this we'll have to crawl the page data.


In [1]:
import pandas as pd
import requests
import re
import numpy as np
import sys
from bs4 import BeautifulSoup

In [2]:
# csv filename
file = 'data/total_market_cap.csv'

# Read existing data, create new if none found
try:
    df = pd.read_csv(file, parse_dates=True, index_col='Date')
    latest_date = df.index[-1]
    
    # Check if latest registered data is up to date
    if latest_date + pd.offsets.Week() > pd.to_datetime('today'):
        sys.exit('Data is already up to date!') # Interrupt program
    else:
        print('Latest data point at: ' + latest_date.strftime('%d-%B-%Y'))    
except FileNotFoundError:
    print('File Not Found!\nWriting to ' + file + '...')
    df = pd.DataFrame([], columns=['Date', 'Total Market Cap'])
    df = df.set_index('Date')
    latest_date = pd.to_datetime('20130421')  # Sets first data point at 20130421
    
# Create date range for historical snapshots from latest date to today
Date = pd.date_range(start=latest_date+pd.offsets.Week(), end='today', freq='7D').strftime('%Y%m%d')

# Retrieve market cap value in dollars
market_cap = []
base_url = 'https://coinmarketcap.com/historical/'
for i, date in enumerate(Date):
    # Retrieve historical snapshot data from date
    page = requests.get(base_url + date)
    soup = BeautifulSoup(page.content, 'html.parser')
    # Extract marketcap value from span
    market_cap.append(int(re.sub(r',|\$', '', soup.find('span', {'id' : 'total-marketcap'}).text.strip())))
    
# Create data frame of data
market_cap_df = pd.DataFrame({'Date':Date, 'Total Market Cap':market_cap})
market_cap_df.Date = pd.to_datetime(market_cap_df.Date)
market_cap_df = market_cap_df.set_index('Date')

# Write to file
df.append(market_cap_df).to_csv(file)
print('\nTotal Market Cap data has been successfully updated to!')


SystemExit: Data is already up to date!

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


#### Write/Update data for coins
Coinmarketcap doesn't have an API to retrieve historical data, so we are going to do it by ourselves. Using bs4 we are limited to data on the specific page, therefore only data upto one month old is parsed. You can manually download the all-time data via the url.

In [3]:
import os
folder = 'price/'
tail = '.csv'
filenames = os.listdir(folder)
    
# Get coin name of files
coins = list(map(lambda x: re.sub(tail, '', x).upper(), filenames))
coin_name = ['cardano', 'bitcoin-cash', 'bitcoin', 'dash', 'ethereum', 'iota', 'litecoin', 'nem', 'monero', 'ripple']
coin_dict = dict(zip(coins, coin_name))

# Data constants
header = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap']
base_url = 'https://coinmarketcap.com/currencies/'
tail_url = '/historical-data/'

In [4]:
# Go through and update all coin data
for coin in coins:
    print('-'*40)
    print('Parsing ' + coin + ' data...')
    # Load stored data, if none found create new
    file = folder + coin.lower() + tail
    try:
        original_df = pd.read_csv(file, delimiter='\t', index_col='Date', parse_dates=True, 
                              dtype={'Open':str, 'High':str, 'Low':str, 'Close':str})
        file_not_found = False
        latest_date = original_df.index[0]
        
        # Check if data is up to date
        if latest_date + pd.offsets.Day() >= pd.to_datetime('today'):
            print(coin + ' data already up to date!')
            continue
        else:
            print('Latest data point at: ' + latest_date.strftime('%Y-%m-%d'))
            
    except FileNotFoundError:
        print('File not found!\nCreating ' + coin + tail)
        file_not_found = True
    
    # Get html data
    url = base_url + coin_dict[coin] + tail_url
    if not file_not_found:
        url += (r'?start=' + (latest_date + pd.offsets.Day()).strftime('%Y%m%d') + 
                r'&end=' + pd.to_datetime('today').strftime('%Y%m%d'))
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')

    # Extract table data from html
    table = soup.find('div', {'class':'table-responsive'})
    table_body = table.find('tbody')
    rows = table_body.find_all('tr')

    data = []
    for row in rows:
        cols = row.find_all('td')
        cols = [e.text.strip() for e in cols]
        data.append(cols)
    
    # Convert parsed data into data frame
    parsed_df = pd.DataFrame(data, columns=header)
    parsed_df.Date = pd.to_datetime(parsed_df.Date)
    parsed_df = parsed_df.set_index('Date')

    # Check if data from the same day are equal
    #print(parsed_df[parsed_df.index == latest_date] == original_df.iloc[0])
    
    # If no original file
    if file_not_found:
        parsed_df.to_csv(file, sep='\t')
        print(coin + ' data from ' + parsed_df.index[0].strftime('%d-%B-%Y') + 
                      ' to ' + parsed_df.index[-1].strftime('%d-%B-%Y') + 
                      ' has been successfully written to ' + file)
    # Concat new and original dataframe and write to file
    else:
        pd.concat((parsed_df, original_df)).to_csv(file, sep='\t')
        print(coin + ' data from ' + latest_date.strftime('%d-%B-%Y') + 
                      ' has been successfully updated to ' + parsed_df.index[0].strftime('%d-%B-%Y') + 
                      ' and written to ' + file)

----------------------------------------
Parsing ADA data...
ADA data already up to date!
----------------------------------------
Parsing BCH data...
BCH data already up to date!
----------------------------------------
Parsing BTC data...
BTC data already up to date!
----------------------------------------
Parsing DASH data...
DASH data already up to date!
----------------------------------------
Parsing ETH data...
ETH data already up to date!
----------------------------------------
Parsing IOTA data...
IOTA data already up to date!
----------------------------------------
Parsing LTC data...
LTC data already up to date!
----------------------------------------
Parsing XEM data...
XEM data already up to date!
----------------------------------------
Parsing XMR data...
XMR data already up to date!
----------------------------------------
Parsing XRP data...
XRP data already up to date!


# <a href='https://blockchain.info/'>Blockchain.info</a><br>

The site provides a download url for all types of data in the same csv format.

In [5]:
# File updater function for data from blockchain.info
def blockchain_file_update(filename, url, folder='data/'):
    # Load current existing data
    df = pd.read_csv(folder + filename, names=['Date', 'Data'], index_col='Date', parse_dates=True)
    url_df = pd.read_csv(url, names=['Date', 'Data'], index_col='Date', parse_dates=True)

    # Replace data file if newer data available
    if df.index[-1] < url_df.index[-1]:
        # Check if current file data matches url data
        if not df.isin(url_df[:df.index[-1]]).all().values:
            sys.exit(filename + ' doesn\'t match url data.')
        
        # Download url from blockchain.info
        r = requests.get(url, allow_redirects=True)
        open(folder + filename, 'wb').write(r.content)  # Write to wallet file
        print(filename + ' has been successfully updated!')
    else:
        print(filename + ' already up to date!')

In [6]:
filename = ['wallet_users.csv',
            'hash_rate_raw.csv']
url = ['https://blockchain.info/charts/my-wallet-n-users?timespan=all&format=csv',
       'https://api.blockchain.info/charts/hash-rate?timespan=all&format=csv']

for f, u in zip(filename, url):
    blockchain_file_update(f, u)

wallet_users.csv already up to date!
hash_rate_raw.csv already up to date!
