In [1]:
import pandas as pd

dfs = {}

names = ['AAA','BBB','CCC','DDD','EEE']

# read in the price data for the 5 cryptocurrency prices, named 'AAA', 'BBB', 'CCC', 'DDD', 'EEE'
for name in names:
    dfs[name] = pd.read_csv(r'/Volumes/MasterDrive/Quant Research/' + name + "_prices_interpolated.csv")

In [2]:
# create the first-fourth order diffs from the price data of each of the coins

def directional_indicator(row):
    if row > 0:
        return 1
    if row < 0:
        return -1
    else:
        return 0
    
for name in names:
    dfs[name]['diff'] = dfs[name]['price'].diff()
    dfs[name]['first_order_states'] = dfs[name]['diff'].apply(directional_indicator)
    # impute missing data as no change (neutral), and get the Markov chain
    dfs[name]['diff_indicator_second_order'] = dfs[name]['first_order_states'].shift(-1).fillna(0).astype('int64')
    dfs[name]['second_order_states'] = list(zip(dfs[name]['first_order_states'], dfs[name]['diff_indicator_second_order']))
    dfs[name]['diff_indicator_third_order'] = dfs[name]['diff_indicator_second_order'].shift(-1).fillna(0).astype('int64')
    dfs[name]['third_order_states'] = list(zip(dfs[name]['first_order_states'], dfs[name]['diff_indicator_second_order'],dfs[name]['diff_indicator_third_order']))
    dfs[name]['diff_indicator_fourth_order'] = dfs[name]['diff_indicator_third_order'].shift(-1).fillna(0).astype('int64')
    dfs[name]['fourth_order_states'] = list(zip(dfs[name]['first_order_states'], dfs[name]['diff_indicator_second_order'],dfs[name]['diff_indicator_third_order'],dfs[name]['diff_indicator_fourth_order']))

In [3]:
dfs['AAA']

Unnamed: 0,datetime,position,price,diff,first_order_states,diff_indicator_second_order,second_order_states,diff_indicator_third_order,third_order_states,diff_indicator_fourth_order,fourth_order_states
0,1970-01-01 00:00:00.000000001,0.0,1.10331,,0,-1,"(0, -1)",0,"(0, -1, 0)",-1,"(0, -1, 0, -1)"
1,1970-01-01 00:00:00.000000002,0.0,1.10320,-0.00011,-1,0,"(-1, 0)",-1,"(-1, 0, -1)",1,"(-1, 0, -1, 1)"
2,1970-01-01 00:00:00.000000003,0.0,1.10320,0.00000,0,-1,"(0, -1)",1,"(0, -1, 1)",0,"(0, -1, 1, 0)"
3,1970-01-01 00:00:00.000000004,0.0,1.10303,-0.00017,-1,1,"(-1, 1)",0,"(-1, 1, 0)",-1,"(-1, 1, 0, -1)"
4,1970-01-01 00:00:00.000000005,0.0,1.10389,0.00086,1,0,"(1, 0)",-1,"(1, 0, -1)",1,"(1, 0, -1, 1)"
5,1970-01-01 00:00:00.000000006,0.0,1.10389,0.00000,0,-1,"(0, -1)",1,"(0, -1, 1)",1,"(0, -1, 1, 1)"
6,1970-01-01 00:00:00.000000007,0.0,1.10372,-0.00017,-1,1,"(-1, 1)",1,"(-1, 1, 1)",0,"(-1, 1, 1, 0)"
7,1970-01-01 00:00:00.000000008,0.0,1.10375,0.00003,1,1,"(1, 1)",0,"(1, 1, 0)",-1,"(1, 1, 0, -1)"
8,1970-01-01 00:00:00.000000009,0.0,1.10376,0.00001,1,0,"(1, 0)",-1,"(1, 0, -1)",0,"(1, 0, -1, 0)"
9,1970-01-01 00:00:00.000000010,0.0,1.10376,0.00000,0,-1,"(0, -1)",0,"(0, -1, 0)",1,"(0, -1, 0, 1)"


In [4]:
# import numpy as np
# for name in names:
#     dfs[name]['second_order_states'].iloc[-1:] = np.nan
#     dfs[name]['third_order_states'].iloc[-2:] = np.nan
#     dfs[name]['fourth_order_states'].iloc[-3:] = np.nan

In [5]:
import itertools

states = [-1,0,1]

# since R is unable to take tuples as inputs to the verifymarkovproperty() function, create a one-to-one mapping from each of the 3^n tuple elements in each of the nth-order state spaces to integers
second_order_mapping = dict(zip(itertools.product(states,states),range(0,9)))
third_order_mapping = dict(zip(itertools.product(states,states,states),range(0,27)))
fourth_order_mapping = dict(zip(itertools.product(states,states,states,states),range(0,81)))



In [6]:
# apply the integer mappings

def second_order_operation(row):
    if row:
        return second_order_mapping[row]
    else:
        return row

def third_order_operation(row):
    if row:
        return third_order_mapping[row]
    else:
        return row


def fourth_order_operation(row):
    if row:
        return fourth_order_mapping[row]
    else:
        return row

for name in names:
    dfs[name]['second_order_states'] = dfs[name]['second_order_states'].apply(second_order_operation)
    dfs[name]['third_order_states'] = dfs[name]['third_order_states'].apply(third_order_operation)
    dfs[name]['fourth_order_states'] = dfs[name]['fourth_order_states'].apply(fourth_order_operation)
    dfs[name]['price_first_shift'] = dfs[name]['price'].shift(-1)
    dfs[name]['price_second_shift'] = dfs[name]['price_first_shift'].shift(-1)

In [7]:
# for name in names:
#     dfs[name]['price_ewma'] = dfs[name]['price'].ewm(com=1).mean()

In [8]:
# save the diff time series (mapped to integers) as a csv file
for name in names:
    dfs[name][['position','price','price_first_shift','price_second_shift','diff','first_order_states','second_order_states','third_order_states','fourth_order_states']].iloc[:-2].to_csv(r'/Volumes/MasterDrive/Quant Research/' + name + '_prices_interpolated_final.csv')