In [33]:
# Update all data by scraping data
import downloader

try:
    downloader.update_market_cap()
except Exception as e:
    print(e)
    pass
try:
    downloader.update_coins()
except Exception as e:
    print(e)
    pass

Updating market cap...
Data is already up to date!

Updating coins...
BCH data from 03-January-2018 has been successfully updated to 04-January-2018 and written to price/bch.csv
IOTA data from 03-January-2018 has been successfully updated to 04-January-2018 and written to price/iota.csv
TRX data from 03-January-2018 has been successfully updated to 04-January-2018 and written to price/trx.csv
LTC data from 03-January-2018 has been successfully updated to 04-January-2018 and written to price/ltc.csv
ETH data from 03-January-2018 has been successfully updated to 04-January-2018 and written to price/eth.csv
ADA data from 03-January-2018 has been successfully updated to 04-January-2018 and written to price/ada.csv
DASH data from 03-January-2018 has been successfully updated to 04-January-2018 and written to price/dash.csv
BTC data from 03-January-2018 has been successfully updated to 04-January-2018 and written to price/btc.csv
XEM data from 03-January-2018 has been successfully updated to

SystemExit: wallet_users.csv doesn't match url data.

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import re

%matplotlib notebook

#### Load data
Import price data from csv files and add them to a list 

In [None]:
# Load all data from folder
import os

# Location of files
folder = '../price/'
tail = '.csv'
filenames = os.listdir(folder)
    
# Get the supplied coin name
coins = list(map(lambda x: re.sub(tail, '', x).upper(), filenames))

# Load data
df_list = []
for filename in filenames:
    df_list.append(pd.read_csv(folder + filename, delimiter='\t', index_col='Date', parse_dates=True))
    # input(filename + ' loaded.\nEnter to proceed')    # Debug for file read errors.


# Clear up each list
for index, df in enumerate(df_list):
    # Drop all columns except Date and Market Cap 
    df = df.drop(['Open', 'High', 'Low', 'Close', 'Volume'], axis=1)
    # Convert Date column datatype to Date
    #df.index = pd.to_datetime(df.index)    
    
    # Convert Market Cap column datatype to numeric
    df['Market Cap'] = df['Market Cap'].str.replace(',', '')    # Remove commas
    df = df[df['Market Cap'].str.contains('-') == False]        # Delete missing data
    df['Market Cap'] = pd.to_numeric(df['Market Cap'])
    
    # Replace previous dataframe
    df_list[index] = df 
    

In [None]:
# Print df info
for index, df in enumerate(df_list):
    print('[' + str(index) + '] Ticker: ' + coins[index])
    df.info()
    print('--'*25)
    print(df.head())
    print('><'*25)

In [None]:
# Load total market cap data
market_cap_df = pd.read_csv('data/total_market_cap.csv', index_col='Date', parse_dates=True)
market_cap_df.head()

Combine all coin dataframes to one single dataframe. The new dataframe is then resampled into weekly mean to fit the market cap data. To better visualize it's relation to benford's law the dataframe is then converted into percentage of total market cap.

In [None]:
len_list = [len(df) for df in df_list]
coin_index = len_list.index(max(len_list))    # Find index of coin with oldest data

# Create new data frame with coins as columns
cc_df = pd.DataFrame([], columns=coins, index=df_list[coin_index].index)
for i in range(0, len(len_list)):
    cc_df[coins[i]] = df_list[i]['Market Cap']
    
cc_df.head()

In [None]:
# Group by group_period as 7D, since market cap data points set at weekly
group_period = '7D'
cc_df = cc_df.resample(group_period).mean()
cc_df.head()

In [None]:
# Create ccp with percentage of total market cap
ccp_df = cc_df[coins].div(market_cap_df['Total Market Cap'], axis=0)
ccp_df = ccp_df[::-1]    # Reverse order of df
ccp_df.head()

Barplot on each coin's individual coin market cap as percentage of total market cap.

In [None]:
# Function to add percentage text on top of each bar in a barplot
def add_bar_percentage(ax):
    for r in ax.patches:
        height = r.get_height()
        ax.text(r.get_x()+r.get_width()/2.,    # x-pos of text
                height + 0.01,                 # y-pos of text
                '{:3.1f}%'.format(height*100), # text string
                ha="center") 

In [None]:
# Gather benford's distribution percentages
num_benfords = 10
benfords = [np.log10(1+1/i) for i in range(1, num_benfords + 1)]
x_ben = [x for x in range(0, num_benfords)]

In [None]:
# Plot data for Dec 2017
from matplotlib.dates import *

row = 0    # Data row to plot
row_data = ccp_df.iloc[row]
row_date = row_data.name.strftime('%d-%b-%Y')
sorted_ = row_data.sort_values(ascending = False)

fig, ax = plt.subplots(figsize=(8,4))
sns.barplot(sorted_.index.values, sorted_, ax=ax)

ax.set_ylim(0, 0.8) # Adjust ylimits

# Set percentage ticks on yaxis
vals = ax.get_yticks()
ax.set_yticklabels(['{:2.0f}%'.format(x*100) for x in vals])
ax.set_axisbelow(True)
ax.autoscale_view()

# Add percentage text ontop of the bars
#add_bar_percentage(ax) 

## Plot benford's diagram
# Plot reference points for benford's diagram
ben, = plt.plot(x_ben, benfords, 'yD', alpha=0.3, zorder=2)
plt.legend([ben], ['Benford\'s distribution'])

# Create smooth interlopation of benfords distr
from scipy.interpolate import spline
x_smooth = np.linspace(0, 9, 300)
benfords_smooth = spline(x_ben, benfords, x_smooth)
# Plot smoothed benfords curve
plt.plot(x_smooth, benfords_smooth, 'y', alpha=0.4)

plt.title('Market Cap ' + row_date)
plt.ylabel('Percentage of total market cap')
plt.show()

In [None]:
# Plot Benford's law as barplot
fig, ax = plt.subplots(figsize=(8,4))
sns.barplot([x for x in range(1, num_benfords + 1)], benfords, palette='Blues_r')

add_bar_percentage(ax)
    
# Change y limits and display as percentages
ax.set_ylim(0, 0.35)  
vals = ax.get_yticks()
ax.set_yticklabels(['{:2.0f}%'.format(x*100) for x in vals])
ax.set_axisbelow(True)
ax.autoscale_view()

plt.title('Benford Distribution')
plt.show()

In [None]:
import plotly.plotly as py
from plotly.graph_objs import *

In [None]:
data = [Bar(x=[x for x in range(0, 10)], y=benfords)]
py.iplot(data)