In [1]:
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import re

%matplotlib inline

#### Load data
Import price data from csv files and add them to a list 

In [2]:
# Load all data from folder
import os

# Location of files
folder = 'price/'
tail = '.csv'
filenames = os.listdir(folder)
    
# Get the supplied coin name
coins = list(map(lambda x: re.sub(tail, '', x).upper(), filenames))

# Load data
df_list = []
for filename in filenames:
    df_list.append(pd.read_csv(folder + filename, delimiter='\t', index_col='Date', parse_dates=True))
    # input(filename + ' loaded.\nEnter to proceed')    # Debug for file read errors.


# Clear up each list
for index, df in enumerate(df_list):
    # Drop all columns except Date and Market Cap 
    df = df.drop(['Open', 'High', 'Low', 'Close', 'Volume'], axis=1)
    # Convert Date column datatype to Date
    #df.index = pd.to_datetime(df.index)    
    
    # Convert Market Cap column datatype to numeric
    df['Market Cap'] = df['Market Cap'].str.replace(',', '')    # Remove commas
    df = df[df['Market Cap'].str.contains('-') == False]        # Delete missing data
    df['Market Cap'] = pd.to_numeric(df['Market Cap'])
    
    # Replace previous dataframe
    df_list[index] = df 
    

In [3]:
# Print df info
for index, df in enumerate(df_list):
    print('[' + str(index) + '] Ticker: ' + coins[index])
    df.info()
    print('--'*25)
    print(df.head())
    print('><'*25)

[0] Ticker: BCH
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 134 entries, 2017-12-13 to 2017-08-02
Data columns (total 1 columns):
Market Cap    134 non-null int64
dtypes: int64(1)
memory usage: 2.1 KB
--------------------------------------------------
             Market Cap
Date                   
2017-12-13  27249600000
2017-12-12  24051800000
2017-12-11  22239900000
2017-12-10  23784900000
2017-12-09  24671600000
><><><><><><><><><><><><><><><><><><><><><><><><><
[1] Ticker: BTC
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1691 entries, 2017-12-13 to 2013-04-28
Data columns (total 1 columns):
Market Cap    1691 non-null int64
dtypes: int64(1)
memory usage: 26.4 KB
--------------------------------------------------
              Market Cap
Date                    
2017-12-13  292900000000
2017-12-12  283155000000
2017-12-11  258147000000
2017-12-10  253782000000
2017-12-09  276415000000
><><><><><><><><><><><><><><><><><><><><><><><><><
[2] Ticker: DASH
<class 'panda

In [4]:
len_list = [len(df) for df in df_list]
coin_index = len_list.index(max(len_list))    # Find index of coin with oldest data

# Create new data frame with coins as columns
cc_df = pd.DataFrame([], columns=coins, index=df_list[coin_index].index)
for i in range(0, len(len_list)):
    cc_df[coins[i]] = df_list[i]['Market Cap']
    
cc_df.head()

Unnamed: 0_level_0,BCH,BTC,DASH,ETH,IOTA,LTC,XEM,XMR,XRP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-12-13,27249600000.0,292900000000,7168440000.0,62105600000.0,13112600000.0,17116500000,4763320000.0,4749580000.0,14437600000.0
2017-12-12,24051800000.0,283155000000,5934340000.0,50286400000.0,11852900000.0,11528600000,4458080000.0,4348820000.0,9772240000.0
2017-12-11,22239900000.0,258147000000,5333190000.0,42389300000.0,11512900000.0,8042020000,3545280000.0,3784310000.0,9164670000.0
2017-12-10,23784900000.0,253782000000,5600430000.0,45501400000.0,12946400000.0,8438840000,4002320000.0,4061590000.0,9512330000.0
2017-12-09,24671600000.0,276415000000,5633100000.0,44005600000.0,14029600000.0,6910360000,6157700000.0,4283550000.0,9768540000.0


In [5]:
# Group by group_period as 7D, since market cap data points set at weekly
group_period = '7D'
cc_df = cc_df.resample(group_period).mean()
cc_df.head()

Unnamed: 0_level_0,BCH,BTC,DASH,ETH,IOTA,LTC,XEM,XMR,XRP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2013-04-28,,1384919000.0,,,,67811510.0,,,
2013-05-05,,1262736000.0,,,,59749810.0,,,
2013-05-12,,1300063000.0,,,,54825500.0,,,
2013-05-19,,1395911000.0,,,,57043030.0,,,
2013-05-26,,1464011000.0,,,,55544510.0,,,


In [6]:
# Load total market cap data
market_cap_df = pd.read_csv('data/total_market_cap.csv', index_col='Date', parse_dates=True)
market_cap_df.head()

Unnamed: 0_level_0,Total Market Cap
Date,Unnamed: 1_level_1
2013-04-28,1596170359
2013-05-05,1341783003
2013-05-12,1367920068
2013-05-19,1448321940
2013-05-26,1548982054


In [7]:
# Create ccp with percentage of total market cap
ccp_df = cc_df[coins].div(market_cap_df['Total Market Cap'], axis=0)
ccp_df = ccp_df[::-1]    # Reverse order of df
ccp_df.head()

Unnamed: 0_level_0,BCH,BTC,DASH,ETH,IOTA,LTC,XEM,XMR,XRP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-12-10,0.063526,0.710135,0.015689,0.130726,0.03226,0.029454,0.010945,0.01106,0.027993
2017-12-03,0.074217,0.677017,0.01724,0.130914,0.028073,0.017013,0.008879,0.011357,0.028418
2017-11-26,0.090717,0.58292,0.018647,0.156748,0.011475,0.01734,0.00727,0.009737,0.035197
2017-11-19,0.096756,0.577684,0.016948,0.158101,0.010211,0.016685,0.00785,0.009616,0.039082
2017-11-12,0.103357,0.572756,0.01628,0.154614,0.009546,0.017126,0.008599,0.00941,0.040725


In [8]:
# Function to add percentage text on top of each bar in a barplot
def add_bar_percentage(ax):
    for r in ax.patches:
        height = r.get_height()
        ax.text(r.get_x()+r.get_width()/2.,    # x-pos of text
                height + 0.01,                 # y-pos of text
                '{:3.1f}%'.format(height*100), # text string
                ha="center") 

In [9]:
# Plot data for Dec 2017
from matplotlib.dates import *

row = 0
row_data = ccp_df.iloc[row]
row_date = row_data.name.strftime('%d-%b-%Y')
sorted_ = row_data.sort_values(ascending = False)

fig, ax = plt.subplots(figsize=(8,4))
sns.barplot(sorted_.index.values, sorted_, ax=ax)

ax.set_ylim(0, 0.8) # Adjust ylimits

# Set percentage ticks on yaxis
vals = ax.get_yticks()
ax.set_yticklabels(['{:2.0f}%'.format(x*100) for x in vals])
ax.autoscale_view()

add_bar_percentage(ax) # Add percentage text ontop of the bars

plt.title('Market Cap ' + row_date)
plt.ylabel('Percentage of total market cap')
plt.show()

<IPython.core.display.Javascript object>

In [10]:
# Plot Benford's Law
def benfords_prob(d):
    return np.log10(1+1/d)

num_benfords = 9
benfords = [benfords_prob(i) for i in range(1, num_benfords + 1)]

fig, ax = plt.subplots(figsize=(8,4))
sns.barplot([x for x in range(1, num_benfords + 1)], benfords, palette='Blues_r')

add_bar_percentage(ax)
    
# Change y limits and display as percentages
ax.set_ylim(0, 0.35)  
vals = ax.get_yticks()
ax.set_yticklabels(['{:2.0f}%'.format(x*100) for x in vals])
ax.autoscale_view()

plt.title('Benford Distribution')
plt.show()

<IPython.core.display.Javascript object>