In [1]:
# Update all data by scraping data
import downloader

try:
    downloader.update_market_cap()
except Exception as e:
    print(e)
    pass
try:
    downloader.update_coins()
except Exception as e:
    print(e)
    pass

Updating market cap...
Data is already up to date!

Updating coins...
DASH data already up to date!
TRX data already up to date!
LTC data already up to date!
ETH data already up to date!
ADA data already up to date!
IOTA data already up to date!
BTC data already up to date!
BCH data already up to date!
XEM data already up to date!
XLM data already up to date!
XMR data already up to date!
XRP data already up to date!
All downloads finished!



In [2]:
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import re

%matplotlib notebook

#### Load data
Import price data from csv files and add them to a list 

In [3]:
# Load all data from folder
import os

# Location of files
folder = '../price/'
tail = '.csv'
filenames = os.listdir(folder)
    
# Get the supplied coin name
coins = list(map(lambda x: re.sub(tail, '', x).upper(), filenames))

# Load data
df_list = []
for filename in filenames:
    df_list.append(pd.read_csv(folder + filename, delimiter='\t', index_col='Date', parse_dates=True))
    # input(filename + ' loaded.\nEnter to proceed')    # Debug for file read errors.


# Clear up each list
for index, df in enumerate(df_list):
    # Drop all columns except Date and Market Cap 
    df = df.drop(['Open', 'High', 'Low', 'Close', 'Volume'], axis=1)
    # Convert Date column datatype to Date
    #df.index = pd.to_datetime(df.index)    
    
    # Convert Market Cap column datatype to numeric
    df['Market Cap'] = df['Market Cap'].str.replace(',', '')    # Remove commas
    df = df[df['Market Cap'].str.contains('-') == False]        # Delete missing data
    df['Market Cap'] = pd.to_numeric(df['Market Cap'])
    
    # Replace previous dataframe
    df_list[index] = df 
    

In [4]:
# Print df info
for index, df in enumerate(df_list):
    print('[' + str(index) + '] Ticker: ' + coins[index])
    df.info()
    print('--'*25)
    print(df.head())
    print('><'*25)

[0] Ticker: ADA
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 98 entries, 2018-01-08 to 2017-10-03
Data columns (total 1 columns):
Market Cap    98 non-null int64
dtypes: int64(1)
memory usage: 1.5 KB
--------------------------------------------------
             Market Cap
Date                   
2018-01-08  25909700000
2018-01-07  26553200000
2018-01-06  25849300000
2018-01-05  30364400000
2018-01-04  28364900000
><><><><><><><><><><><><><><><><><><><><><><><><><
[1] Ticker: BCH
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 160 entries, 2018-01-08 to 2017-08-02
Data columns (total 1 columns):
Market Cap    160 non-null int64
dtypes: int64(1)
memory usage: 2.5 KB
--------------------------------------------------
             Market Cap
Date                   
2018-01-08  47091600000
2018-01-07  47054000000
2018-01-06  43653200000
2018-01-05  40557600000
2018-01-04  44069100000
><><><><><><><><><><><><><><><><><><><><><><><><><
[2] Ticker: BTC
<class 'pandas.core.frame.

In [5]:
# Load total market cap data
market_cap_df = pd.read_csv('../data/total_market_cap.csv', index_col='Date', parse_dates=True)
market_cap_df.head()

Unnamed: 0_level_0,Total Market Cap
Date,Unnamed: 1_level_1
2013-04-28,1596170359
2013-05-05,1341783003
2013-05-12,1367920068
2013-05-19,1448321940
2013-05-26,1548982054


Combine all coin dataframes to one single dataframe. The new dataframe is then resampled into weekly mean to fit the market cap data. To better visualize it's relation to benford's law the dataframe is then converted into percentage of total market cap.

In [6]:
len_list = [len(df) for df in df_list]
coin_index = len_list.index(max(len_list))    # Find index of coin with oldest data

# Create new data frame with coins as columns
cc_df = pd.DataFrame([], columns=coins, index=df_list[coin_index].index)
for i in range(0, len(len_list)):
    cc_df[coins[i]] = df_list[i]['Market Cap']
    
cc_df.head()

Unnamed: 0_level_0,ADA,BCH,BTC,DASH,ETH,IOTA,LTC,TRX,XEM,XLM,XMR,XRP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-01-08,25909700000.0,47091600000.0,276612000000,10016200000.0,112164000000.0,11314500000.0,15726800000,13491200000.0,16454000000.0,12442100000.0,7139010000.0,130302000000.0
2018-01-07,26553200000.0,47054000000.0,294222000000,9881510000.0,100981000000.0,10968300000.0,16178100000,11419000000.0,14898800000.0,12688400000.0,7103000000.0,119819000000.0
2018-01-06,25849300000.0,43653200000.0,293091000000,9026110000.0,96326500000.0,10436900000.0,13645300000,14760400000.0,14216700000.0,11854000000.0,5999260000.0,117033000000.0
2018-01-05,30364400000.0,40557600000.0,259748000000,9527170000.0,94423900000.0,11286600000.0,13164800000,13779500000.0,15195600000.0,13219400000.0,6262550000.0,127870000000.0
2018-01-04,28364900000.0,44069100000.0,256250000000,9151080000.0,93049400000.0,11466400000.0,13403400000,6240820000.0,16520300000.0,15953600000.0,6448110000.0,120763000000.0


In [7]:
# Group by group_period as 7D, since market cap data points set at weekly
group_period = '7D'
cc_df = cc_df.resample(group_period).mean()
cc_df.head()

Unnamed: 0_level_0,ADA,BCH,BTC,DASH,ETH,IOTA,LTC,TRX,XEM,XLM,XMR,XRP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2013-04-28,,,1384919000.0,,,,67811510.0,,,,,
2013-05-05,,,1262736000.0,,,,59749810.0,,,,,
2013-05-12,,,1300063000.0,,,,54825500.0,,,,,
2013-05-19,,,1395911000.0,,,,57043030.0,,,,,
2013-05-26,,,1464011000.0,,,,55544510.0,,,,,


In [8]:
# Create ccp with percentage of total market cap
ccp_df = cc_df[coins].div(market_cap_df['Total Market Cap'], axis=0)
ccp_df = ccp_df[::-1]    # Reverse order of df
ccp_df.head()

Unnamed: 0_level_0,ADA,BCH,BTC,DASH,ETH,IOTA,LTC,TRX,XEM,XLM,XMR,XRP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-01-07,0.031896,0.057238,0.347051,0.012097,0.129586,0.013547,0.019397,0.015145,0.019062,0.015279,0.008659,0.152066
2017-12-31,0.039647,0.07435,0.434708,0.015312,0.146262,0.018648,0.022744,0.01215,0.020765,0.017985,0.01018,0.180424
2017-12-24,0.052423,0.109015,0.27437,0.018376,0.27307,0.022977,0.049785,0.004832,0.016601,0.007995,0.013713,0.068307
2017-12-17,0.020938,0.076287,0.494514,0.016296,0.125222,0.020778,0.029391,0.004795,0.012927,0.007698,0.010045,0.057782
2017-12-10,0.010283,0.07025,0.722664,0.016856,0.149556,0.030611,0.034686,0.001813,0.012045,0.007308,0.011755,0.046163


Barplot on each coin's individual coin market cap as percentage of total market cap.

In [9]:
# Function to add percentage text on top of each bar in a barplot
def add_bar_percentage(ax):
    for r in ax.patches:
        height = r.get_height()
        ax.text(r.get_x()+r.get_width()/2.,    # x-pos of text
                height + 0.01,                 # y-pos of text
                '{:3.1f}%'.format(height*100), # text string
                ha="center") 

In [10]:
# Gather benford's distribution percentages
num_benfords = 10
benfords = [np.log10(1+1/i) for i in range(1, num_benfords + 1)]
x_ben = [x for x in range(0, num_benfords)]

In [11]:
# Plot data for Dec 2017
from matplotlib.dates import *

row = 0    # Data row to plot
row_data = ccp_df.iloc[row]
row_date = row_data.name.strftime('%d-%b-%Y')
sorted_ = row_data.sort_values(ascending = False)

fig, ax = plt.subplots(figsize=(8,4))
sns.barplot(sorted_.index.values[:10], sorted_[:10], ax=ax)

ax.set_ylim(0, 0.8) # Adjust ylimits

# Set percentage ticks on yaxis
vals = ax.get_yticks()
ax.set_yticklabels(['{:2.0f}%'.format(x*100) for x in vals])
ax.set_axisbelow(True)
ax.autoscale_view()

# Add percentage text ontop of the bars
#add_bar_percentage(ax) 

## Plot benford's diagram
# Plot reference points for benford's diagram
ben, = plt.plot(x_ben, benfords, 'yD', alpha=0.6, zorder=2)
plt.legend([ben], ['Benford\'s distribution'])

# Create smooth interlopation of benfords distr
from scipy.interpolate import spline
x_smooth = np.linspace(0, 9, 300)
benfords_smooth = spline(x_ben, benfords, x_smooth)
# Plot smoothed benfords curve
plt.plot(x_smooth, benfords_smooth, 'y', alpha=0.6)

plt.title('Market Cap ' + row_date)
plt.ylabel('Percentage of total market cap')
#plt.savefig(str('mrktcap_' + row_data.name.strftime('%Y%m%d')))

<IPython.core.display.Javascript object>

spline is deprecated in scipy 0.19.0, use Bspline class instead.


In [12]:
# Plot Benford's law as barplot
fig, ax = plt.subplots(figsize=(8,4))
sns.barplot([x for x in range(1, num_benfords + 1)], benfords, palette='Blues_r')

add_bar_percentage(ax)
    
# Change y limits and display as percentages
ax.set_ylim(0, 0.35)  
vals = ax.get_yticks()
ax.set_yticklabels(['{:2.0f}%'.format(x*100) for x in vals])
ax.set_axisbelow(True)
ax.autoscale_view()

plt.title('Benford Distribution')
plt.show()

<IPython.core.display.Javascript object>