# <a href='https://coinmarketcap.com/'>Coinmarketcap</a><br>

### Notice: This file has been written to downloader.py, this is now used mostly for testing!

#### Total Market Cap Data
In this section we gather the total market cap data from coinmarketcap. Since coinmarketcap doesn't provide any api or historical datatable for this we'll have to scrape the page for data.

In [None]:
import pandas as pd
import requests
import re, sys, os
import numpy as np
from bs4 import BeautifulSoup
import concurrent.futures 

In [None]:
# csv filename
file = 'data/total_market_cap.csv'

## Read existing data, create new if none found
try:
    df = pd.read_csv(file, parse_dates=True, index_col='Date')
    latest_date = df.index[-1]
    
    # Check if latest registered data is up to date
    if latest_date + pd.offsets.Week() >= pd.to_datetime('today'):
        sys.exit('Data is already up to date!') # Interrupt program
    else:
        print('Latest data point at: ' + latest_date.strftime('%d-%m-%Y'))    
except FileNotFoundError:
    print('File Not Found!\nWriting to ' + file + '...')
    # Create empty dataframe
    df = pd.DataFrame([], columns=['Date', 'Total Market Cap'])
    df = df.set_index('Date')
    # Set first data point at 20130421
    latest_date = pd.to_datetime('20130421')  
    
# Create date range for historical snapshots from latest date to today-1 day since data uploads after day
Date = pd.date_range(start=latest_date+pd.offsets.Week(), 
                     end=pd.to_datetime('today')-pd.offsets.Day(), freq='7D').strftime('%Y%m%d')

market_cap = [None]*len(Date)
# Request and return market cap value for given date from web
def get_market_cap(date):
    # Retrieve historical snapshot data from date
    page = requests.get(base_url + date)
    soup = BeautifulSoup(page.content, 'html.parser')
    body = soup.find('body')
    container = body.find('div', {'class':'container'}, recursive=False)
    mcap = container.find('span', {'id' : 'total-marketcap'}).text.strip()
    
    # Extract marketcap value from span
    return int(re.sub(r',|\$', '', mcap))

## Retrieve market cap value in dollars
base_url = 'https://coinmarketcap.com/historical/'
print('Parsing data from {} to {}'.format(Date[0], Date[-1]))
print('-'*40)
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: # 2 Threads seems to be optimal in jupyter
    futures = [executor.submit(get_market_cap, date) for date in Date]
    market_cap = [future.result() for future in futures]
        
## Create data frame of date
market_cap_df = pd.DataFrame({'Date':Date, 'Total Market Cap':market_cap})
market_cap_df.Date = pd.to_datetime(market_cap_df.Date)
market_cap_df = market_cap_df.set_index('Date')

## Write to file
df.append(market_cap_df).to_csv(file)
print('\nTotal Market Cap data has been successfully updated to ' + 
      market_cap_df.index[-1].strftime('%d-%m-%Y') + '!')


#### Write/Update data for coins
Coinmarketcap doesn't have an API to retrieve historical data, so we are going to do it by ourselves. Using bs4 we are limited to data on the specific page, therefore only data upto one month old is parsed. You can manually download the all-time data via the url.

In [None]:
# Path constants
folder = 'price/'
tail = '.csv'

# Check for files in given folder, creates folder if it doesn't exist
try:
    filenames = os.listdir(folder)
    folder_not_found = False
except:
    print('Folder not found, creating \'' + folder + '\'')
    os.makedirs(folder)
    folder_not_found = True

# Get coin name and ticker from files/web if file not found
if folder_not_found or len(filenames) < 1:
    coin_dict = get_top_coins()      # Scrap top 9 coins from coinmarketcap
    coins = list(coin_dict.keys())
    coin_names = list(coin_dict.values())
else:
    coins = list(map(lambda x: re.sub(tail, '', x).upper(), filenames)) # Get existing coin names
    
    # Manual method
    coin_names = ['cardano', 'bitcoin-cash', 'bitcoin', 'dash', 'ethereum', 'iota', 'litecoin', 'nem', 'monero', 'ripple']
    coin_dict = dict(zip(coins, coin_names))
    
    # Programmer method
    #coin_dict = get_top_coins(15)
    #coin_names = [coin_dict[coin] for coin in coins] 
    
# Data constants
header = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap']
base_url = 'https://coinmarketcap.com/currencies/'
tail_url = '/historical-data/'

In [None]:
# Get top @nmbr_of_coins coins on coinmarketcap
def get_top_coins(nmbr_of_coins = 9):
    # Get main ranking table
    page = requests.get('https://coinmarketcap.com/')
    soup = BeautifulSoup(page.content, 'html.parser')
    body = soup.find('body')
    container = body.find('div', {'class':'container'}, recursive=False)
    table = container.find('table', {'id':'currencies'})

    # Table body
    tbody = table.find('tbody')
    rows = tbody.find_all('tr')[:nmbr_of_coins]
    
    # Get the coin name and ticker from each row in table
    coins = []
    coin_names = []
    for row in rows:
        a = row.find('span', {'class':'currency-symbol'}).find('a')
        coins.append(a.get_text())                  # Get coin ticker
        coin_names.append(a['href'].split('/')[-2]) # Get coin name
        
    # Return dictionary sorted by coin ticker
    return dict([(coin, coin_name) for coin, coin_name in sorted(zip(coins, coin_names))])


In [None]:
a = get_top_coins(50)

In [None]:
for k, v in a.items(:
    print(k)
    print(v)

In [None]:
# Retrieve coin historical data from coinmarketcap
def download_coin_data(coin):
    # Load stored data, if none found create new
    file = folder + coin.lower() + tail
    try:
        original_df = pd.read_csv(file, delimiter='\t', index_col='Date', parse_dates=True, 
                              dtype={'Open':str, 'High':str, 'Low':str, 'Close':str})
        file_not_found = False
        latest_date = original_df.index[0]
        
        # Check if data is up to date
        if latest_date + pd.offsets.Day() >= pd.to_datetime('today'):
            print(coin + ' data already up to date!')
            return
            
    except FileNotFoundError:
        file_not_found = True
    
    # Get html data
    url = base_url + coin_dict[coin] + tail_url
    if not file_not_found: # Only request data from date before last date
        url += (r'?start=' + (latest_date + pd.offsets.Day()).strftime('%Y%m%d') + 
                r'&end=' + pd.to_datetime('today').strftime('%Y%m%d'))
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')

    # Extract table data from html
    table = soup.find('div', {'class':'table-responsive'})
    table_body = table.find('tbody')
    rows = table_body.find_all('tr')

    data = []
    for row in rows:
        cols = row.find_all('td')
        cols = [e.text.strip() for e in cols]
        data.append(cols)
    
    # Convert parsed data into data frame
    parsed_df = pd.DataFrame(data, columns=header)
    parsed_df.Date = pd.to_datetime(parsed_df.Date)
    parsed_df = parsed_df.set_index('Date')
    
    # If no original file
    if file_not_found:
        parsed_df.to_csv(file, sep='\t')
        print(coin + ' data from ' + parsed_df.index[0].strftime('%d-%B-%Y') + 
                      ' to ' + parsed_df.index[-1].strftime('%d-%B-%Y') + 
                      ' has been successfully written to ' + file)
    # Concat new and original dataframe and write to file
    else:
        pd.concat((parsed_df, original_df)).to_csv(file, sep='\t')
        print(coin + ' data from ' + latest_date.strftime('%d-%B-%Y') + 
                      ' has been successfully updated to ' + parsed_df.index[0].strftime('%d-%B-%Y') + 
                      ' and written to ' + file)
        

In [None]:
# Download all coin data in coins concurrently
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    for coin in coins:
        executor.submit(download_coin_data, coin)

print('All downloads finished!')

# <a href='https://blockchain.info/'>Blockchain.info</a><br>

The site provides a download url for all types of data in the same csv format.

In [None]:
import downloader