In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import os
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from statsmodels.graphics.tsaplots import plot_acf
import matplotlib.pyplot as plt
import statsmodels.api as stm
import statsmodels
import arch.data.frenchdata
import matplotlib.pyplot as plt
from arch.unitroot import VarianceRatio
from hurst import compute_Hc, random_walk
import warnings
warnings.filterwarnings("ignore")

In [None]:
%load_ext rpy2.ipython
%R install.packages("randtests")
%R library(randtests)

## Tests used for testing EMH

In [3]:
def lung_box(returns):
   return stm.stats.diagnostic.acorr_ljungbox(returns, lags=[10], boxpierce=False)

def runs_test(returns):
  ret= pd.Series.to_numpy(returns)
  return statsmodels.sandbox.stats.runs.runstest_1samp(ret, cutoff='median', correction= True)

def hurst_exponent(returns):
  return compute_Hc(returns, kind= 'random_walk', simplified=True)

def bds(returns):
  return statsmodels.tsa.stattools.bds(returns, max_dim=2, epsilon=None, distance=1.5)

## P-value calculation using a rolling window

In [4]:
def RollingWindows(all_returns, rollingWindow):
  df = pd.DataFrame()
  for i in range (rollingWindow,len(all_returns)):
      returns = all_returns[i-rollingWindow:i]
      pvalue = []
      pvalue.append((lung_box(returns)[1]).item(0))
      pvalue.append(runs_test(returns)[1])
      pvalue.append(VarianceRatio(returns, 100).pvalue)
      pvalue.append((bds(returns)[1]).item(0))
      pvalue.append(hurst_exponent(returns)[0])
      %Rpush returns
      %R bartels = bartels.rank.test(returns, alternative= 'two.sided', pvalue='normal')
      bartels = %Rget bartels
      bartelsValue = bartels[5]
      pvalue.append(bartelsValue[0])
      x = pd.Series(pvalue)
      df = df.append(x,ignore_index=True)
  return df

## Crypto efficiency testing function

In [5]:
def CryptoEfficiency2(df,rollingWindow_size,excelName):
  writer = pd.ExcelWriter(excelName,engine='xlsxwriter')
  for i in range (0,len(df.sheet_names)):
    df_sheet = df.parse(i)
    all_returns = np.log(df_sheet['Close']).diff().dropna()
    dff = RollingWindows(all_returns,rollingWindow_size)
    dff = dff.rename(columns = {0:'Ljung-Box',1:'RunsTest', 2:'VarianceRatio', 3: 'BDSTest', 4:'HurstExponent', 5:'BartelsTest'})
    dff.to_excel(writer,sheet_name=str(df.sheet_names[i]), index= False)
    print(df.sheet_names[i],' processed ', i+1,'/',len(df.sheet_names))
  writer.save()
  print('Saved as: ',excelName)

In [None]:
def RollingWindows2(all_returns, rollingWindow):
  df = pd.DataFrame()
  for i in range (rollingWindow,len(all_returns)):
      returns = all_returns[i-rollingWindow:i]
      pvalue = []
      pvalue.append((lung_box(returns)[1]).item(0))
      pvalue.append(runs_test(returns)[1])
      pvalue.append(VarianceRatio(returns, 100).pvalue)
      pvalue.append((bds(returns)[1]).item(0))
      pvalue.append(hurst_exponent(returns)[0])
      %Rpush returns
      %R bartels = bartels.rank.test(returns, alternative= 'two.sided', pvalue='normal')
      bartels = %Rget bartels
      bartelsValue = bartels[5]
      pvalue.append(bartelsValue[0])
      x = pd.Series(pvalue)
      df = df.append(x,ignore_index=True)
  return df

In [None]:
def CryptoEfficiency3(df,rollingWindow_size,excelName):
  writer = pd.ExcelWriter(excelName,engine='xlsxwriter')
  for i in range (0,len(df.sheet_names)):
    df_sheet = df.parse(i)
    all_returns = df_sheet['Returns']
    dff = RollingWindows2(all_returns,rollingWindow_size)
    dff = dff.rename(columns = {0:'Ljung-Box',1:'RunsTest', 2:'VarianceRatio', 3: 'BDSTest', 4:'HurstExponent', 5:'BartelsTest'})
    dff.to_excel(writer,sheet_name=str(df.sheet_names[i]), index= False)
    print(df.sheet_names[i],' processed ', i+1,'/',len(df.sheet_names))
  writer.save()
  print('Saved as: ',excelName)

In [16]:
all_data= pd.ExcelFile('/Users/Catalina/Desktop/crypto_jumps/DateCryptoD.xlsx')
no_jumps= pd.ExcelFile('/Users/Catalina/Desktop/crypto_jumps/nojumps.xlsx')

In [None]:
CryptoEfficiency2(all_data, 250, 'Data_All_pval')

In [20]:
CryptoEfficiency3(no_jumps, 250, 'nojumps_pval')

no_jumps_bitfinex  processed  1 / 2
no_jumps_coinbase  processed  2 / 2
Saved as:  nojumps_pval


## Percentiles

In [143]:
def Percentile2(df, q, excelName):
    writer = pd.ExcelWriter(excelName,engine='xlsxwriter')
    for i in range (0,len(df.sheet_names)):
      df_sheet = df.parse(i)
      percentile = df_sheet.quantile(q, axis=0, numeric_only=True, interpolation='linear')
      percentile.insert(0,'Percentiles',q)
      percentile.to_excel(writer,sheet_name=str(df.sheet_names[i]), index= False)
      print(df.sheet_names[i],' processed ', i+1,'/',len(df.sheet_names))
    writer.save()
    print('Saved as: ',excelName)

In [136]:
pval_all= pd.ExcelFile('/Users/Catalina/Desktop/crypto_jumps/Data_All_pval.xlsx')
pval_nojumps= pd.ExcelFile('/Users/Catalina/Desktop/crypto_jumps/nojumps_pval.xlsx')

In [None]:
Percentile2(pval_all, [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1], 'percentile_all')

In [142]:
Percentile2(pval_nojumps, [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1], 'percentile_no_jumps')

no_jumps_bitfinex  processed  1 / 2
no_jumps_coinbase  processed  2 / 2
Saved as:  percentile_no_jumps


## Distribution Comparison


In [2]:
percentiles= pd.ExcelFile('/Users/Catalina/Desktop/crypto_jumps/quantile_compa.xlsx')     

In [11]:
percentiles.head()

Unnamed: 0,All Data,W/out Jumps
0,0.000434,3.6e-05
1,0.000676,0.000142
2,0.004388,0.000345
3,0.012633,0.004173
4,0.029045,0.015717


### Kolmogorov-Smirnov

In [43]:
def KSComparasion(df,alt):
    for i in df.sheet_names:
      arr = [0,0,0]
      df_sheet = df.parse(i)
      ks = stats.ks_2samp(df_sheet['All Data'], df_sheet['W/out Jumps'], alternative= alt, mode='auto')
      arr[0] = i
      arr[1] = round(ks.statistic,2)
      arr[2] = ks.pvalue
      print(arr)
    

two sided H0: F(x)= G(x)\
less H0: F(x) >= G(x)\
greater H0:  F(x) <= G(x)

In [48]:
KSComparasion(percentiles, 'two-sided')

['LjungBox_BTC_Bitfinex', 0.25, 0.8689816711757754]
['LjungBox_BTC_Coinbase', 0.25, 0.8689816711757754]
['Runs_BTC_Bitfinex', 0.08, 1.0]
['Runs_BTC_Coinbase', 0.08, 1.0]
['VarianceRatio_BTC_Bitfinex', 0.17, 0.9984852944874484]
['VarianceRatio_BTC_Coinbase', 0.17, 0.9984852944874484]
['BDS_BTC_Bitfinex', 0.17, 0.9984852944874484]
['BDS_BTC_Coinbase', 0.17, 0.9984852944874484]
['Hurst_BTC_Bitfinex', 0.33, 0.5360977695073805]
['Hurst_BTC_Coinbase', 0.25, 0.8689816711757754]
['Bartels_BTC_Bitfinex', 0.08, 1.0]
['Bartels_BTC_Coinbase', 0.08, 1.0]


###  Cramer-von-Mises

two-sided H0: same distribution

In [3]:
def CMComparasion(df):
    for i in df.sheet_names:
      arr = [0,0,0]
      df_sheet = df.parse(i)
      cm = stats.cramervonmises_2samp(df_sheet['All Data'], df_sheet['W/out Jumps'])
      arr[0] = i
      arr[1] = round(cm.statistic,2)
      arr[2] = cm.pvalue
      print(arr)

In [4]:
CMComparasion(percentiles)

['LjungBox_BTC_Bitfinex', 0.09, 0.6838759125006051]
['LjungBox_BTC_Coinbase', 0.06, 0.8581100527218358]
['Runs_BTC_Bitfinex', 0.02, 0.9999999999791294]
['Runs_BTC_Coinbase', 0.01, 1.0]
['VarianceRatio_BTC_Bitfinex', 0.03, 0.9905271476587646]
['VarianceRatio_BTC_Coinbase', 0.03, 0.9988042870979117]
['BDS_BTC_Bitfinex', 0.03, 0.9905271476587646]
['BDS_BTC_Coinbase', 0.03, 0.9905271476587646]
['Hurst_BTC_Bitfinex', 0.26, 0.18589323337606456]
['Hurst_BTC_Coinbase', 0.19, 0.2920032106215408]
['Bartels_BTC_Bitfinex', 0.02, 0.999991368054848]
['Bartels_BTC_Coinbase', 0.02, 0.999991368054848]
