# Stylized Facts of Cryptocurrency

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import glob
import re
from statsmodels.tsa.stattools import acf, pacf

plt.rcParams['figure.figsize'] = (15.0, 5.0)

In [None]:
files = [f for f in glob.glob('../data/*price.csv')]
files.sort()
coin_names = list(map(lambda f: re.match(re.compile('.*/(.*)_price.csv'), f).group(1), files))
returns_map = dict();
data_sizes = pd.DataFrame(index=['size'], columns=coin_names)
for idx, file in enumerate(files):
    df = pd.read_csv(file)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.sort_values(by='Date')
    df.set_index('Date', inplace=True)
    prices = df['Close']
    returns = pd.Series(np.log(prices / prices.shift()))
    returns.dropna(inplace=True)
    returns_map[coin_names[idx]] = returns;
    data_sizes[coin_names[idx]][0] = returns.size
data_sizes

## Basic Statistics
* Q1: What does A* mean, another version of central limit theorem?
* Q2: z-score here might be meaningless since it is used to assess the null hypothesis that the expected return is zero

In [None]:
columns = ['10^4 mean', '10^2 std', 'max', 'min', 'skew', 'skew_abs', 'kurtosis', 'G%', 'A*%', 'z']
df = pd.DataFrame(index=coin_names, dtype='float', columns=columns)
N = 365;
for key, returns in returns_map.items():
    G = math.exp(N * returns.mean()) - 1
    Astar = (1 + G) * math.exp(N * 0.5 * math.pow(returns.std(), 2.0)) - 1
    z = returns.mean() * math.sqrt(returns.size) / returns.std() 
    
    df.loc[key] = [returns.mean() * 10000, returns.std() * 100, 
                   returns.max(), returns.min(), 
                   returns.skew(), abs(returns.skew()), returns.kurtosis(),
                   G * 100, Astar * 100, 
                   z]
df.round(2)

In [None]:
ax = table.plot.scatter(x='kurtosis', y='skew_abs')
for i, text in enumerate(table.index):
    ax.annotate(text, (table['kurtosis'].iat[i], table['skew_abs'].iat[i]))
plt.show()

ax = table.plot.scatter(x='kurtosis', y='std')
for i, text in enumerate(table.index):
    ax.annotate(text, (table['kurtosis'].iat[i], table['std'].iat[i]))
plt.show()

ax = table.plot.scatter(x='kurtosis', y='mean')
for i, text in enumerate(table.index):
    ax.annotate(text, (table['kurtosis'].iat[i], table['mean'].iat[i]))
plt.show()

ax = table.plot.scatter(x='mean', y='std')
for i, text in enumerate(table.index):
    ax.annotate(text, (table['mean'].iat[i], table['std'].iat[i]))
plt.show()

ax = table.plot.scatter(x='min', y='max')
for i, text in enumerate(table.index):
    ax.annotate(text, (table['min'].iat[i], table['max'].iat[i]))
plt.show()

## Shape of Returns Distribution
Q: What's the threshold of skewness and kurtosis as a normal distribution.

In [None]:
columns = ['std', 'skew', 'skew std', 'z-skew', 'kurtosis', 'kurtosis std', 'z-kurtosis']
df = pd.DataFrame(index=coin_names, dtype='float', columns=columns)
for key, returns in returns_map.items():
    skew_std = math.sqrt(6.0 / returns.size)
    kurto_std = math.sqrt(24.0 / returns.size)
    df.loc[key] = [returns.std(), 
                   returns.skew(), skew_std, abs(returns.skew()) / skew_std,
                   returns.kurtosis(), kurto_std, returns.kurtosis() / kurto_std]
df.round(2)

### Example shape graph

In [None]:
returns = returns_map['bitconnect']

returns.plot('kde')
x = np.linspace(returns.min(), returns.max(), returns.size)
plt.plot(x, mlab.normpdf(x, returns.mean(), returns.std()))
returns.plot('hist', bins=40, secondary_y=True, alpha=0.5)
plt.show()

returns.plot()
plt.show()

## Calendar Effects

### Average percentage day-of-week returns

In [None]:
columns = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
df = pd.DataFrame(index=coin_names, dtype='float', columns=columns)

for key, returns in returns_map.items():
    df.loc[key] = 10000 * returns.groupby(returns.index.dayofweek).mean().values
    
print('Average percentage daily 10^4 returns: ')
df.round(2)

### Average percentage day-of-week std

In [None]:
columns = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
df = pd.DataFrame(index=coin_names, dtype='float', columns=columns)

for key, returns in returns_map.items():
    df.loc[key] = 100 * returns.groupby(returns.index.dayofweek).std().values
    
print('Average percentage daily 10^2 std: ')
df.round(2)

In [None]:
columns = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
df = pd.DataFrame(index=index, dtype='float', columns=columns)

returns = returns_map['bitcoin']
groups = returns.groupby(returns.index.dayofweek)

calG = lambda x: math.exp(N * x.mean()) - 1
calAstar = lambda x: (1 + calG(x)) * math.exp(N * 0.5 * math.pow(x.std(), 2.0)) - 1
calz = lambda x: x.mean() * math.sqrt(x.size) / x.std() 

df.loc['10^4 mean']= 10000 * groups.mean().values
df.loc['10^2 std'] = 100 * groups.std().values
df.loc['max'] = groups.max().values
df.loc['min'] = groups.min().values
df.loc['skew'] = groups.skew().values
df.loc['kurtosis'] = groups.apply(lambda x: x.kurtosis()).values
df.loc['G%'] = groups.apply(calG).values; 
df.loc['A*%'] = groups.apply(calAstar).values; 
df.loc['z'] = groups.apply(calz).values; 

df.round(2)

## Autocorrelation

In [None]:
def draw_acf(returns, title):
    lag_acf = acf(returns, nlags=20)
    lag_pacf = pacf(returns, nlags=20, method='ols')

    ######################### ACF ##########################################

    plt.subplot(121)
    plt.bar(x=range(len(lag_acf)), height=lag_acf, alpha=0.3, color='green')
    plt.plot(lag_acf)
    plt.axhline(y=0, linestyle='--',color='blue')
    plt.axhline(y=-1.96/np.sqrt(len(returns)), linestyle='--', color='pink')
    plt.axhline(y=1.96/np.sqrt(len(returns)), linestyle='--', color='blue')
    plt.title(title + ' -- autocorrelation')

    ######################### PACF ##########################################

    plt.subplot(122)
    plt.bar(x=range(len(lag_pacf)), height=lag_pacf, alpha=0.3, color='green')
    plt.plot(lag_pacf)
    plt.axhline(y=0, linestyle='--',color='blue')
    plt.axhline(y=-1.96/np.sqrt(len(returns)), linestyle='--', color='pink')
    plt.axhline(y=1.96/np.sqrt(len(returns)), linestyle='--', color='blue')
    plt.title(title + ' -- partial autocorrelation')
    plt.show()
    
returns = returns_map['bitcoin']
returns_abs = np.abs(returns)
returns_square = np.square(returns)
returns_adjust = np.log(np.abs(np.subtract(returns, returns.mean())))

draw_acf(returns, 'returns')
draw_acf(returns_abs, 'absolute returns')
draw_acf(returns_square, 'square returns')
draw_acf(returns_adjust, 'logarithms of absolute, mean-adjusted returns')