In [1]:
import pandas as pd

In [3]:
# display floats with 2 decimal places
pd.options.display.float_format = '{:,.2f}'.format
 
# expand display limits
pd.options.display.max_rows = 200
pd.options.display.max_columns = 100

In [5]:
# code = data dictionary (for code GWA_BTC)
# date = the day the index values were calculated
# open = opening price index for Bitcoin in the US
# high = highest value index for bitcoin in us 
# low = lowest value index for bitcoin in the us
# close = closing price index for bitcoin in the us
# volume = volume of bitcoin traded that day
# VWAP = volume of bitcoin traded that day
# TWAP = time-weighted average price of bitcoin traded that day


# read BNC2 sample dataset



df = pd.read_csv('BNC2_sample.csv',
                names=['Code', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'VWAP', 'TWAP'])
# display first 5 observations

df.head()

Unnamed: 0,Code,Date,Open,High,Low,Close,Volume,VWAP,TWAP
0,GWA_BTC,2014-04-01,467.28,488.62,467.28,479.56,74776.48,482.76,482.82
1,GWA_BTC,2014-04-02,479.2,494.3,431.32,437.08,114052.96,460.19,465.93
2,GWA_BTC,2014-04-03,437.33,449.74,414.41,445.6,91415.08,432.29,433.28
3,GWA_BTC,2014-04-04,445.18,456.1,429.16,449.81,51147.27,443.46,443.93
4,GWA_BTC,2014-04-05,450.08,464.09,445.16,461.7,28449.19,452.53,452.95


In [7]:
# unique codes in the dataset
print( df.Code.unique() )

['GWA_BTC' 'GWA_ETH' 'GWA_LTC' 'GWA_XLM' 'GWA_XRP' 'MWA_BTC_CNY'
 'MWA_BTC_EUR' 'MWA_BTC_GBP' 'MWA_BTC_JPY' 'MWA_BTC_USD' 'MWA_ETH_CNY'
 'MWA_ETH_EUR' 'MWA_ETH_GBP' 'MWA_ETH_JPY' 'MWA_ETH_USD' 'MWA_LTC_CNY'
 'MWA_LTC_EUR' 'MWA_LTC_GBP' 'MWA_LTC_JPY' 'MWA_LTC_USD' 'MWA_XLM_CNY'
 'MWA_XLM_EUR' 'MWA_XLM_USD' 'MWA_XRP_CNY' 'MWA_XRP_EUR' 'MWA_XRP_GBP'
 'MWA_XRP_JPY' 'MWA_XRP_USD']


In [8]:
# MWA = market weighted average - shows regional prices
# GWA = global weighted average - shows globally indexed prices

# example of GWA and MWA relationship

df[df.Code.isin(['GWA_BTC', 'MWA_BTC_JPY', 'MWA_BTC_EUR'])
  & (df.Date == '2018-01-01')]

Unnamed: 0,Code,Date,Open,High,Low,Close,Volume,VWAP,TWAP
1371,GWA_BTC,2018-01-01,14505.89,14505.89,13617.46,14092.74,225906.21,14103.18,14093.73
9074,MWA_BTC_EUR,2018-01-01,11859.35,11859.35,11111.07,11403.92,14933.73,11488.45,11478.08
11838,MWA_BTC_JPY,2018-01-01,1674341.45,1678567.55,1572173.9,1632657.51,68611.95,1632994.4,1631407.66


In [9]:
# number of observations in dataset
print('Before:', len(df) )

Before: 31761


In [10]:
#get all the GWA codes
gwa_codes = [code for code in df.Code.unique() if 'GWA_' in code]

In [11]:
# only keep GWA observations

df = df[df.Code.isin(gwa_codes)]

In [12]:
# number of observations left
print('After:', len(df) )

After: 6309


In [14]:
# pivot dataset

# calculate returns over the prior 7, 14, 21 and 28 days for the first day of each month
# pivot the dataset while keeping only one price column

pivoted_df = df.pivot(index='Date', columns='Code', values='VWAP')

# display examples from pivoted dataset
pivoted_df.tail()

Code,GWA_BTC,GWA_ETH,GWA_LTC,GWA_XLM,GWA_XRP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-19,11826.36,1068.45,195.0,0.51,1.82
2018-01-20,13062.68,1158.71,207.58,0.52,1.75
2018-01-21,12326.23,1108.9,197.36,0.48,1.55
2018-01-22,11397.52,1038.21,184.92,0.47,1.43
2018-01-23,10921.0,992.05,176.95,0.47,1.42


In [16]:
# calculate returns over the prior 7, 14, 21 and 28 days using shift method

# shift the index of the dataframe by some number of periods

print( pivoted_df.tail(3) )

print( pivoted_df.tail(3).shift(1) )

Code         GWA_BTC  GWA_ETH  GWA_LTC  GWA_XLM  GWA_XRP
Date                                                    
2018-01-21 12,326.23 1,108.90   197.36     0.48     1.55
2018-01-22 11,397.52 1,038.21   184.92     0.47     1.43
2018-01-23 10,921.00   992.05   176.95     0.47     1.42
Code         GWA_BTC  GWA_ETH  GWA_LTC  GWA_XLM  GWA_XRP
Date                                                    
2018-01-21       nan      nan      nan      nan      nan
2018-01-22 12,326.23 1,108.90   197.36     0.48     1.55
2018-01-23 11,397.52 1,038.21   184.92     0.47     1.43


In [17]:
# to calcuate returns over 7 days prior we can use prices_today/prices_7_days_ago -1.0

#calculate returns over 7 days prior
delta_7 = pivoted_df / pivoted_df.shift(7) - 1.0

#display examples
delta_7.tail()

Code,GWA_BTC,GWA_ETH,GWA_LTC,GWA_XLM,GWA_XRP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-19,-0.18,-0.17,-0.18,-0.21,-0.22
2018-01-20,-0.13,-0.19,-0.18,-0.23,-0.29
2018-01-21,-0.15,-0.2,-0.22,-0.22,-0.3
2018-01-22,-0.21,-0.24,-0.24,-0.25,-0.32
2018-01-23,-0.11,-0.12,-0.13,-0.02,-0.04


In [22]:
# calculate returns over each window and store them in dictionary using loop

delta_dict = {}
for offset in [7, 14, 21, 28]:
    delta_dict['delta_{}'.format(offset)] = pivoted_df / pivoted_df.shift(offset) - 1.0

In [23]:
# melt data to create analytical base table (ABT)
# melt delta_7 returns

melted_7 = delta_7.reset_index().melt(id_vars=['Date'], value_name= 'delta_7')

#melted dataframe examples
melted_7.tail()

Unnamed: 0,Date,Code,delta_7
6965,2018-01-19,GWA_XRP,-0.22
6966,2018-01-20,GWA_XRP,-0.29
6967,2018-01-21,GWA_XRP,-0.3
6968,2018-01-22,GWA_XRP,-0.32
6969,2018-01-23,GWA_XRP,-0.04


In [24]:
# to do so for all the returns dataframes, loop through delta_dict

#melt all the delta dataframes and store in list
melted_dfs = []
for key, delta_df in delta_dict.items():
    melted_dfs.append( delta_df.reset_index().melt(id_vars=['Date'], value_name=key))

In [25]:
# calculate 7-day returns after the date
return_df = pivoted_df.shift(-7) / pivoted_df - 1.0

# melt the return dataset and append to list
melted_dfs.append( return_df.reset_index().melt(id_vars=['Date'], value_name='return_7') )

In [26]:
# we now have 5 melted dataframes stred in the melted_dfs list
# now to join melted dataframes into a single ABT 

#merge two dataframes
pd.merge(melted_dfs[0], melted_dfs[1], on=['Date', 'Code']).tail()

Unnamed: 0,Date,Code,delta_7,delta_14
6965,2018-01-19,GWA_XRP,-0.22,-0.41
6966,2018-01-20,GWA_XRP,-0.29,-0.42
6967,2018-01-21,GWA_XRP,-0.3,-0.51
6968,2018-01-22,GWA_XRP,-0.32,-0.52
6969,2018-01-23,GWA_XRP,-0.04,-0.48


In [27]:
# delta_7 and delta_14 are in the same row, this is the start of our ABT
# now need to merge all of our melted dataframes together 

#going to use reduce function in python

from functools import reduce

In [28]:
# create feature_dfs list that contains base features from original dataset plus melted dataset

# grab features from original dataset
base_df = df[['Date', 'Code', 'Volume', 'VWAP']]

# create a list with all the features dataframes
feature_dfs = [base_df] + melted_dfs

In [29]:
# now we can use reduce
# reduce applies to two arguments cummulatively objects and sequence 

# reduce-merge features into analytical bae table
abt = reduce(lambda left,right: pd.merge(left,right,on=['Date', 'Code']), feature_dfs)

# display examples from the ABT 
abt.tail(10)

Unnamed: 0,Date,Code,Volume,VWAP,delta_7,delta_14,delta_21,delta_28,return_7
6299,2018-01-14,GWA_XRP,912107674.18,2.2,-0.31,0.02,1.06,1.97,-0.3
6300,2018-01-15,GWA_XRP,823491754.55,2.11,-0.29,-0.04,0.92,1.84,-0.32
6301,2018-01-16,GWA_XRP,3872977355.95,1.48,-0.46,-0.36,0.35,0.85,-0.04
6302,2018-01-17,GWA_XRP,5111390628.85,1.2,-0.47,-0.57,-0.06,0.58,
6303,2018-01-18,GWA_XRP,5156172462.44,1.68,-0.22,-0.51,0.23,0.59,
6304,2018-01-19,GWA_XRP,2126239927.56,1.82,-0.22,-0.41,0.02,0.65,
6305,2018-01-20,GWA_XRP,1346913296.52,1.75,-0.29,-0.42,-0.26,0.53,
6306,2018-01-21,GWA_XRP,1886060450.81,1.55,-0.3,-0.51,-0.28,0.45,
6307,2018-01-22,GWA_XRP,1784992299.63,1.43,-0.32,-0.52,-0.35,0.3,
6308,2018-01-23,GWA_XRP,2118335564.32,1.42,-0.04,-0.48,-0.39,0.29,


In [31]:
# date = the day which index values were calculated
# code = which cryptocurrency
# VWAP = Volume weighted average price traded that day
# delta_7 = return over the prior 7 days (1.0 =100% return)
# delta_14 = return over the prior 14 days (1.0 = 100% return)
# delta_21 = return over the prior 21 days (1.0 = 100% return)
# delta_28 = return over the prior 28 days (1.0 = 100% return)
# return_7 = future return over the next 7 days (1.0 = 100% return)

# notice how the last 7 observations don't have values for the return_7 feature. this is expected as we cannot calculate "future 7-days returns" for the last 7 days of the dataset

In [32]:
# with the ABT, we already answer our original objective. 
# if we wanted to pick the coin that had the biggest momentum on sep 1, 2017 we can display th rows for the date and look at 7, 14, 21 and 28

# data from sep 1, 2017
abt[abt.Date == '2017-09-01']

Unnamed: 0,Date,Code,Volume,VWAP,delta_7,delta_14,delta_21,delta_28,return_7
1249,2017-09-01,GWA_BTC,275034.79,4798.06,0.1,0.12,0.35,0.69,-0.09
2149,2017-09-01,GWA_ETH,2076778.42,387.55,0.17,0.28,0.29,0.72,-0.21
3543,2017-09-01,GWA_LTC,18553463.67,78.76,0.55,0.71,0.68,0.82,-0.1
4770,2017-09-01,GWA_XLM,372143342.95,0.02,0.19,0.34,0.05,0.1,-0.19
6164,2017-09-01,GWA_XRP,1138500431.07,0.25,0.15,0.57,0.39,0.44,-0.14
