In [21]:
# load packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [22]:
# read Low frequency data in 1 day
data_1d = pd.read_excel('./datasets/BTC-USDT-1d.xlsx').sort_values(by='time')
print(data_1d.head())

                  time    open   close    high     low       volume  \
1498  2020-01-24 00:00  8408.9  8437.9  8524.5  8242.0  1210.267018   
1497  2020-01-25 00:00  8438.0  8341.2  8446.6  8275.8   710.917276   
1496  2020-01-26 00:00  8341.2  8614.0  8619.6  8300.8   959.171707   
1495  2020-01-27 00:00  8614.0  8911.3  8998.8  8577.8  1521.439641   
1494  2020-01-28 00:00  8911.0  9375.0  9394.1  8888.8  1880.775733   

          turnover  
1498  1.015488e+07  
1497  5.929845e+06  
1496  8.133014e+06  
1495  1.335641e+07  
1494  1.705490e+07  


In [23]:
# read high frequency data in 1 hour
data_1h = pd.read_excel('./datasets/BTC-USDT-1h.xlsx').sort_values(by='time')
print(data_1h.head())

                   time    open   close    high     low     volume  \
35953  2020-01-23 23:00  8402.7  8409.0  8427.9  8356.8  60.275855   
35952  2020-01-24 00:00  8408.9  8408.8  8418.2  8389.9  46.796430   
35951  2020-01-24 01:00  8408.6  8360.0  8420.1  8359.9  40.631096   
35950  2020-01-24 02:00  8359.9  8295.8  8366.4  8276.4  97.234574   
35949  2020-01-24 03:00  8295.8  8321.4  8348.7  8285.0  72.503593   

            turnover  
35953  504968.371971  
35952  393189.050588  
35951  341180.443432  
35950  808689.430707  
35949  603016.797060  


In [24]:
# read high frequency data in 5 minutes
data_5m = pd.read_excel('./datasets/BTC-USDT-5m.xlsx').sort_values(by='time')
print(data_5m.head())

                   time    open   close    high     low     volume  \
74999  2020-02-15 19:10  9928.0  9928.7  9936.0  9927.5   4.950662   
74998  2020-02-15 19:15  9928.7  9924.2  9929.4  9921.5   2.594263   
74997  2020-02-15 19:20  9924.0  9908.9  9924.3  9906.8  10.040123   
74996  2020-02-15 19:25  9908.9  9921.3  9925.0  9908.9   3.585131   
74995  2020-02-15 19:30  9921.3  9916.4  9923.1  9911.9   3.916223   

           turnover  
74999  49172.961977  
74998  25752.203115  
74997  99548.948650  
74996  35558.220198  
74995  38839.672064  


In [25]:
# Power variation
def power_variation(X, p, delta_n, c, t):
    """
    Compute the power variation of a time series X
    :param X: time series
    :param p: power
    :param delta_n: number of division from t-1 to t
    :param c: constant
    :param t: time index
    :return: power variation
    """

    # Calculate the increment of X
    # x_t,i=X_t-1+i*delta_n-X_t-1+(i-1)*delta_n
    increments = np.diff(X)

    # Using delta_n to get the value of t
    time = int(t / delta_n)

    # Determine the cycle range
    n_intervals = int(1 / delta_n)

    # Calculate the power variation
    if p < 2:
        truncated_increments = increments[time-n_intervals:time][np.abs(increments[time:time+n_intervals]) <= c]
        power_variation = np.sum(np.power(truncated_increments, p))
        return power_variation
    
    elif p >= 2:
        power_variation = np.sum(np.power(np.abs(increments[time-n_intervals:time]), p))
        return power_variation

# Activity Signature Function (ASF)
def activity_signature_function(X, p, delta_n, c, t):
    """
    Compute the activity signature function of a time series X
    :param X: time series
    :param p: power
    :param delta_n: number of division from t-1 to t
    :param c: constant
    :param t: time index
    :return: activity signature function
    """

    # Check p, p is a positive number
    if p <= 0:
        raise ValueError("p must be a positive number")
    
    # Calculate activity signature function
    numerator = np.log(2) * p
    part_1 = np.log(2)
    part_2 = np.log(power_variation(X, p, delta_n, c, t))
    part_3 = np.log(power_variation(X, p, 2* delta_n, c, t))
    denominator = part_1 * part_2 * part_3
    activity_signature_function = numerator / denominator

    return activity_signature_function

In [26]:
# Use the features in X to calculate the log return 
data_1h['log_return'] = np.log(data_1h['close'] / data_1h['close'].shift(1))
data_1h = data_1h.dropna()

X = data_1h['log_return'].values
p = 2
delta_n = 1/24
c = 5
t = 1

ASF = activity_signature_function(X, p, delta_n, c, t)
print("Activity Signature Function: ", ASF)

estimation_of_c = ASF / 2
print("Estimation of c: ", estimation_of_c)

Activity Signature Function:  0.044684188690159034
Estimation of c:  0.022342094345079517


In [27]:
# Use the features in X to calculate the log return
data_5m['log_return'] = np.log(data_5m['close'] / data_5m['close'].shift(1))
data_5m = data_5m.dropna()

X = data_5m['log_return'].values
p = 2
delta_n = 1/288
c = 5
t = 1

ASF = activity_signature_function(X, p, delta_n, c, t)
print("Activity Signature Function: ", ASF)

estimation_of_c = ASF / 2
print("Estimation of c: ", estimation_of_c)

Activity Signature Function:  0.03926689098299126
Estimation of c:  0.01963344549149563
