### ------------------ Module 3: Compute Indicators ------------------

- What: Load algo results and compute indicator components
- When: 26 May 2022

$\color{red}{\text{Define Parameters:}}$

In [1]:
# Load results from file
FileName = '../OneDrive_1_5-6-2022/ethbusd_1min_Results.csv'

# Location to save results to
SaveFile = '../OneDrive_1_5-6-2022/ethbusd_1min_Results_Indicators.csv'

#### 1. Load libraries

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt

# Little function to keep track of code execution.
def Keep_Track():
    print("Executed successfully. " + dt.datetime.now().strftime("%a %d %b @ %H:%M:%S") + "\n")

Keep_Track()


Executed successfully. Thu 26 May @ 16:00:44



#### 2. Load the data

In [3]:
print('* Loading data.')
Data = pd.read_csv(FileName)

# Provide a quick summary of how many records we have
print('* Data loaded   :', len(Data),'records.')

print('* Create datetime field')

# Create a datatime field
Data['Start_Date'] = pd.to_datetime(Data['Start_Date'], dayfirst=True)
Data = Data.sort_values(by=['Start_Date']).reset_index()

print('* First Date    :', np.min(Data['Start_Date']))
print('* Last trade    :', np.max(Data['Start_Date']))

# Reset the index on the dataframe.
Data = Data.set_index('Start_Date')

print('')
Keep_Track()

* Loading data.
* Data loaded   : 57590 records.
* Create datetime field
* First Date    : 2022-04-01 00:00:00
* Last trade    : 2022-05-10 23:59:00

Executed successfully. Thu 26 May @ 16:00:59



Define a function to compute some useful pre-indicators

In [4]:
# Calulate the volume thresholds and reutrn a list of the column names for later use.
def Vol_Thresholds(Data, Thresholds = np.array([0,1,2,4])):
    
    Data['HL_Spread'] = (Data['high_price'] - Data['low_price']) / Data['high_price']
    
    # Create a list of names collected so they can be auto summed later.
    Name_List = np.empty(0)
    
    # Compute the thresholds
    for i in Thresholds[1:]:
        Name = str(i)+'%<x'
        print('*',Name)
        Data[Name] = np.array(i/100 < Data['HL_Spread'], int)
        Name_List = np.append(Name_List, Name)
        
    for idx, i in enumerate(Thresholds[:-1]):
        Name = str(i)+'%<x<='+str(Thresholds[idx+1])+'%'
        print('*',Name)
        Data[Name] = np.array((i/100 < Data['HL_Spread']) & (Data['HL_Spread'] <= Thresholds[idx+1]/100), int)
        Name_List = np.append(Name_List, Name)
        
    return Data, Name_List  

print('* Vol pre-indicator function built.')
print('')
Keep_Track()

* Vol pre-indicator function built.

Executed successfully. Thu 26 May @ 16:01:05



Calculate the vol range fields

In [5]:
Data, List_Names = Vol_Thresholds(Data)
print('')
Keep_Track()

* 1%<x
* 2%<x
* 4%<x
* 0%<x<=1%
* 1%<x<=2%
* 2%<x<=4%

Executed successfully. Thu 26 May @ 16:01:11



#### 2. Compute rolling measures

In [6]:
# Compute rolling 24hr volume.
print('* Compute 24hr volume')
Vol_24 = Data.rolling('86400s').sum().rename(columns = {'trade_amt':'Vol_24h'})[['Vol_24h']]

# Compute 15min High and Low
print('* Compute 15m high/low price')
Max_15 = Data.rolling('900s').max().rename(columns = {'high_price':'High_15m'})[['High_15m']]
Min_15 = Data.rolling('900s').min().rename(columns = {'low_price':'Low_15m'})[['Low_15m']]

# Compute 24h high
print('* Compute 24hr high price')
High_24 = Data.rolling('86400s').max().rename(columns = {'high_price':'High_24'})[['High_24']]

Data = pd.concat([Data, Max_15, Min_15,High_24,Vol_24],  axis=1)

print('* Compute vol indicators')
start = Data[[]]
# Loop over the vol names to compute rolling sums automatically (6 hours)
for Field in List_Names:
    start = pd.concat([start, Data.rolling('21600s').sum().rename(columns = {Field:Field+'_6H'})[[Field+'_6H']]], axis = 1)


Data = pd.concat([Data, start], axis = 1)    
    
# Data = Data.reset_index()

print('')
Keep_Track()

* Compute 24hr volume
* Compute 15m high/low price
* Compute 24hr high price
* Compute vol indicators

Executed successfully. Thu 26 May @ 16:01:26



#### 3. Compute some data target measures

In [7]:
# Input_Data: The underlying trade data to calculate the target variables for.
# Minutes   : The number of minutes

def Add_Targets(Input_Data, Minutes, Names = []):
    
    # Convert to seconds (probably not necessary)
    Seconds = Minutes * 60
    
    # Calculate the window for the rolling window function
    window = str(Seconds)+'s'

    # Calculate the lag for the shift calculation - we roll this back to make the data forward looking.
    lag = int(Minutes)

    print('* Calculating metrics over', window)

        # Calculate the return (End/Start).
    def Return_Period(x):
        return x[-1]/x[0]

    # Calculate the max return over the window (return on inital day is not used)
    def Return_High_Period(x):
        return np.max(x[1:])/x[0]

    # Calculate the min return over the window (return on inital day is not used)
    def Return_Low_Period(x):
        return np.min(x[1:])/x[0]

    # Calculate the min return over the window (return on inital day is not used)
    def Return_mean_Period(x):
        return np.mean(x[1:])/x[0]


    # Copy of the DD calculation used in Module 2
    def Calculate_Drawdown(x):

        Minute_ret = pd.Series(x).pct_change()
        cum_ret = (1+Minute_ret).cumprod()

        # Calculate the running maximum
        running_max = np.maximum.accumulate(cum_ret.dropna())

        # Ensure the value never drops below 1
        running_max[running_max < 1] = 1

        # Calculate the percentage drawdown - i.e. how far down am I on the running max at that point.
        drawdown = (cum_ret) / running_max - 1

        # The max drop down from a max at any point.
        max_dd = drawdown.min()*100

        return max_dd

    
    # Run calculation
    Return_Period = Input_Data[['close_price']].rolling(window,closed = 'both').apply(Return_Period).rename(columns = {'close_price':'Y_Return_'+window}).shift(-lag)
    Return_High = Input_Data[['close_price']].rolling(window, closed = 'both').apply(Return_High_Period).rename(columns = {'close_price':'Y_Return_high_'+window}).shift(-lag)
    Return_Low = Input_Data[['close_price']].rolling(window, closed = 'both').apply(Return_Low_Period).rename(columns = {'close_price':'Y_Return_low_'+window}).shift(-lag)
    Draw_Down = Input_Data[['close_price']].rolling(window, closed = 'both').apply(Calculate_Drawdown).rename(columns = {'close_price':'Y_DrawDown_'+window}).shift(-lag)

    return pd.concat([Input_Data[['close_price']], Return_Period, Return_High, Return_Low, Draw_Down],  axis=1)

print('')
Keep_Track()


Executed successfully. Thu 26 May @ 16:02:27



In [8]:
Temp = Add_Targets(Data, 60)
print('* Creating master.')
Data = pd.concat([Data, Temp[['Y_Return_3600s','Y_Return_high_3600s','Y_Return_low_3600s','Y_DrawDown_3600s']]], axis = 1)

Data = Data.reset_index()

print('')
Keep_Track()

* Calculating metrics over 3600s
* Creating master.

Executed successfully. Thu 26 May @ 16:05:02



In [None]:
# Data

#### 4. Save

In [None]:
print('* Saving file to', SaveFile)
Data.to_csv(SaveFile, index=False)

print('')
Keep_Track()

Test

In [None]:
print('* Loading data.')
Data = pd.read_csv(SaveFile)
print('')
Keep_Track()

In [None]:
# Data.info()