### ------------------ Module 4a: Analysis ------------------

- What: Generate non-overlapping data
- When: 17 May 2022    

$\color{red}{\text{Define Parameters:}}$

In [1]:
# Load results from file
FileName = '../OneDrive_1_5-6-2022/ethbusd_1min_Results_Indicators.csv'

# Location to save results to
SaveFile = ''


#### 1. Load some libraries

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt

# Little function to keep track of code execution.
def Keep_Track():
    print("Executed successfully. " + dt.datetime.now().strftime("%a %d %b @ %H:%M:%S") + "\n")

Keep_Track()

Executed successfully. Wed 18 May @ 12:35:16



#### 2. Load some data.

2.1 Create a function to load the dataset and perform some simple calculations

In [3]:
# This function loads the results of a previous run.
def Load_Run(File_Name):
    print('* Loading', File_Name)
    
    try:
    
        # Load the transaction dataset.
        Data = pd.read_csv(File_Name)

        # Convert the time to a timestamp - need to specify the format is day first to avoid parsing errors.
        Data['start_timestamp'] = pd.to_datetime(Data['start_timestamp'], dayfirst=True)
        Data['End_Date'] = pd.to_datetime(Data['End_Date'], dayfirst=True)
        Data['Start_Date'] = pd.to_datetime(Data['Start_Date'], dayfirst=True)
        
        Data['HL15'] = (Data['High_15m'] - Data['Low_15m']) / Data['High_15m']

        # Provide a quick summary of how many records we have
        print('* Data loaded   :', len(Data),'records.')
        print('* First Date    :', np.min(Data['start_timestamp']))
        print('* Last trade    :', np.max(Data['start_timestamp']))
        
            # Drop the level_0 column if it exists
        try:
            Data.drop('level_0', axis=1, inplace=True)
        except:
            # Do nothing
            print('')
        
        print('* ---------- Load complete.')
        
        return Data
    
    except:
        print('* ---------- Load FAILED!')

print('* Load result function defined.')    
print('')
Keep_Track()

* Load result function defined.

Executed successfully. Wed 18 May @ 12:35:19



#### 3. Analysis

In [4]:
# Load results from file
Out_All = Load_Run(FileName)

print('')
Keep_Track()

* Loading ../OneDrive_1_5-6-2022/ethbusd_1min_Results_Indicators.csv
* Data loaded   : 57542 records.
* First Date    : 2022-04-01 00:00:00
* Last trade    : 2022-05-10 23:21:00
* ---------- Load complete.

Executed successfully. Wed 18 May @ 12:35:27



In [5]:
# Example
Start_Date = '2022-04-01 00:07:00'
Out_All[Out_All['Start_Date']==Start_Date][['Start_Date','End_Date','Life(s)','Initial_Open','Final_Close', 'Level', 'Capital','Cap_Return%']]

Unnamed: 0,Start_Date,End_Date,Life(s),Initial_Open,Final_Close,Level,Capital,Cap_Return%
7,2022-04-01 00:07:00,2022-04-01 02:07:00,7200.0,3288.0,3254.93,2,4.44,-0.021182


Generate a path of non-overlapping runs

In [6]:
# Create a subset of days that are of interest.
# For example (note we have to enforce that algo has non-zero life)
Vol = 390
Indicator = (Out_All['Vol_24h'] >= Vol*1000000) & (Out_All['Life(s)'] > 0)

Run_Start = Out_All[Indicator][['Start_Date','End_Date','Level','Life(s)','Cap_Return%','Sharpe','Max_Drawdown']].copy()
Run_End = Out_All[Indicator][['Start_Date','End_Date','Level','Life(s)','Cap_Return%','Sharpe','Max_Drawdown']].copy()

# Pick some start dates for testing
# Test_Dates = np.array(['2022-04-01 17:24:00', '2022-04-01 17:41:00','2022-05-10 23:11:00'])
# Run_Start = Out_All[Out_All['Start_Date'].isin(Test_Dates)][['Start_Date','End_Date','Life(s)','Level','Cap_Return%']].copy()
# Run_End = Out_All[Out_All['Start_Date'].isin(Test_Dates)][['Start_Date','End_Date','Life(s)','Level','Cap_Return%']].copy()

# Add in a distant date to terminate the process later
End_Record = Run_Start[Run_Start['Start_Date']==np.max(Run_Start['Start_Date'])].copy()
End_Record['Start_Date'] = End_Record['Start_Date'] + pd.offsets.DateOffset(years=20)
End_Record['End_Date'] = End_Record['Start_Date']
# For this new date we have no results of interest.  Set to Nan so they can be excluded
End_Record['Cap_Return%'] = np.nan
End_Record['Level'] = np.nan
End_Record['Life(s)'] = np.nan
End_Record['Sharpe'] = np.nan
End_Record['Max_Drawdown'] = np.nan


Run_Start = pd.concat([Run_Start,End_Record])
Run_End = pd.concat([Run_End,End_Record])


# Order Run_start by start date and Run_End by end date so we can use merge_asof
Run_Start = Run_Start.sort_values(by=['End_Date'])
Run_End = Run_End.sort_values(by=['Start_Date'])


# Run_Start.sort_values(by=['Start_Date'])

print('')
Keep_Track()


Executed successfully. Wed 18 May @ 12:38:19



Build the paths.  Each algo runs until it stops.  We then wait until the next start date allowed (for example the next date an indicator is true) then it runs again.  etc.

In [7]:
def Build_Paths(Run_Start, Run_End):

    print('* Running ...')
    # Loop over joins until all data used.
    Continue = True

    # only need a limited number - 400 should be enough (the number of algos)
    for i in range(400):

        if Continue:
            print(i,'|',end='')

            if i == 0:
                left_name = 'End_Date'
            else:
                left_name = 'End_Date_' + str(i-1)

            suffix_name = '_'+str(i)
            sort_name = 'End_Date_' +str(i)

            Run_Start = pd.merge_asof(Run_Start, Run_End, left_on=left_name, right_on = 'Start_Date', suffixes=('', suffix_name), direction='forward').sort_values(by=[sort_name])

            # Loop until all end dates are the last date.
            Continue = ~(Run_Start[left_name] == np.array(End_Record['Start_Date'])[0]).all()
            
    return Run_Start      

print('* Build non-overlapping paths function complete')
print('')
Keep_Track()

* Build non-overlapping paths function complete

Executed successfully. Wed 18 May @ 12:38:26



Run

In [8]:
Results = Build_Paths(Run_Start, Run_End)
print('')
print('')
Keep_Track()

* Running ...
0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 |16 |17 |18 |19 |20 |21 |22 |23 |24 |25 |26 |27 |28 |29 |30 |31 |32 |33 |34 |35 |36 |37 |38 |39 |40 |41 |42 |43 |44 |45 |46 |47 |48 |49 |50 |51 |52 |53 |54 |55 |56 |57 |58 |59 |60 |61 |62 |63 |64 |65 |66 |67 |68 |69 |70 |71 |72 |73 |74 |75 |76 |77 |78 |79 |80 |81 |82 |83 |84 |85 |86 |87 |88 |89 |90 |91 |92 |93 |94 |95 |96 |97 |98 |99 |100 |101 |102 |103 |104 |105 |106 |107 |108 |109 |110 |111 |112 |113 |114 |115 |116 |117 |118 |119 |120 |121 |122 |123 |124 |125 |126 |127 |128 |129 |130 |131 |132 |133 |134 |135 |136 |137 |138 |139 |140 |141 |142 |143 |144 |145 |146 |147 |148 |149 |150 |151 |152 |153 |154 |155 |156 |157 |158 |159 |160 |161 |162 |163 |164 |165 |166 |167 |168 |169 |170 |171 |172 |173 |174 |175 |176 |177 |178 |179 |180 |181 |182 |183 |184 |185 |186 |187 |188 |189 |190 |191 |192 |193 |194 |195 |196 |197 |198 |199 |200 |201 |202 |203 |204 |205 |206 |207 |208 |209 |210 |211 |212 |213 |214 |215 |216 |217 |218 |2

In [9]:
# Example start dates
(Results.loc[:,Results.columns.str.startswith("Start_Date")]).head(5)
# Run_Start

Unnamed: 0,Start_Date,Start_Date_0,Start_Date_1,Start_Date_2,Start_Date_3,Start_Date_4,Start_Date_5,Start_Date_6,Start_Date_7,Start_Date_8,...,Start_Date_376,Start_Date_377,Start_Date_378,Start_Date_379,Start_Date_380,Start_Date_381,Start_Date_382,Start_Date_383,Start_Date_384,Start_Date_385
0,2022-04-01 17:01:00,2022-04-01 17:12:00,2022-04-01 17:37:00,2022-04-01 17:41:00,2022-04-01 19:41:00,2022-04-01 21:41:00,2022-04-01 23:41:00,2022-04-02 00:34:00,2022-04-02 00:45:00,2022-04-02 00:51:00,...,2022-05-10 22:09:00,2022-05-10 22:10:00,2022-05-10 22:54:00,2022-05-10 22:56:00,2022-05-10 22:57:00,2022-05-10 23:03:00,2022-05-10 23:05:00,2022-05-10 23:21:00,2042-05-10 23:21:00,2042-05-10 23:21:00
17747,2022-04-01 20:20:00,2022-04-01 22:20:00,2022-04-02 00:20:00,2022-04-02 00:36:00,2022-04-02 00:45:00,2022-04-02 00:51:00,2022-04-02 00:55:00,2022-04-02 02:55:00,2022-04-02 04:55:00,2022-04-02 06:55:00,...,2022-05-10 22:57:00,2022-05-10 23:03:00,2022-05-10 23:05:00,2022-05-10 23:21:00,2042-05-10 23:21:00,2042-05-10 23:21:00,2042-05-10 23:21:00,2042-05-10 23:21:00,2042-05-10 23:21:00,2042-05-10 23:21:00
17746,2022-04-01 16:45:00,2022-04-01 17:24:00,2022-04-01 17:39:00,2022-04-01 18:03:00,2022-04-01 20:03:00,2022-04-01 22:03:00,2022-04-02 00:03:00,2022-04-02 00:34:00,2022-04-02 00:45:00,2022-04-02 00:51:00,...,2022-05-10 22:09:00,2022-05-10 22:10:00,2022-05-10 22:54:00,2022-05-10 22:56:00,2022-05-10 22:57:00,2022-05-10 23:03:00,2022-05-10 23:05:00,2022-05-10 23:21:00,2042-05-10 23:21:00,2042-05-10 23:21:00
17745,2022-04-01 16:40:00,2022-04-01 17:24:00,2022-04-01 17:39:00,2022-04-01 18:03:00,2022-04-01 20:03:00,2022-04-01 22:03:00,2022-04-02 00:03:00,2022-04-02 00:34:00,2022-04-02 00:45:00,2022-04-02 00:51:00,...,2022-05-10 22:09:00,2022-05-10 22:10:00,2022-05-10 22:54:00,2022-05-10 22:56:00,2022-05-10 22:57:00,2022-05-10 23:03:00,2022-05-10 23:05:00,2022-05-10 23:21:00,2042-05-10 23:21:00,2042-05-10 23:21:00
17744,2022-04-01 17:02:00,2022-04-01 17:24:00,2022-04-01 17:39:00,2022-04-01 18:03:00,2022-04-01 20:03:00,2022-04-01 22:03:00,2022-04-02 00:03:00,2022-04-02 00:34:00,2022-04-02 00:45:00,2022-04-02 00:51:00,...,2022-05-10 22:09:00,2022-05-10 22:10:00,2022-05-10 22:54:00,2022-05-10 22:56:00,2022-05-10 22:57:00,2022-05-10 23:03:00,2022-05-10 23:05:00,2022-05-10 23:21:00,2042-05-10 23:21:00,2042-05-10 23:21:00


In [10]:
# Example Cap return
(Results.loc[:,Results.columns.str.startswith("Cap_Return%")]).head(5)

Unnamed: 0,Cap_Return%,Cap_Return%_0,Cap_Return%_1,Cap_Return%_2,Cap_Return%_3,Cap_Return%_4,Cap_Return%_5,Cap_Return%_6,Cap_Return%_7,Cap_Return%_8,...,Cap_Return%_376,Cap_Return%_377,Cap_Return%_378,Cap_Return%_379,Cap_Return%_380,Cap_Return%_381,Cap_Return%_382,Cap_Return%_383,Cap_Return%_384,Cap_Return%_385
0,0.008559,0.009363,0.009341,-0.001249,-0.000748,-0.001196,0.008787,0.010238,0.010473,0.00911,...,0.00864,0.032828,0.010002,0.010043,0.010662,0.008519,0.009191,0.008359,,
17747,-0.006101,0.003762,0.008551,0.008933,0.010473,0.00911,-0.028754,0.003779,-0.000343,-0.001892,...,0.010662,0.008519,0.009191,0.008359,,,,,,
17746,0.016687,0.009313,0.00864,0.001146,-0.007534,-0.001935,0.009283,0.010238,0.010473,0.00911,...,0.00864,0.032828,0.010002,0.010043,0.010662,0.008519,0.009191,0.008359,,
17745,0.009119,0.009313,0.00864,0.001146,-0.007534,-0.001935,0.009283,0.010238,0.010473,0.00911,...,0.00864,0.032828,0.010002,0.010043,0.010662,0.008519,0.009191,0.008359,,
17744,0.009672,0.009313,0.00864,0.001146,-0.007534,-0.001935,0.009283,0.010238,0.010473,0.00911,...,0.00864,0.032828,0.010002,0.010043,0.010662,0.008519,0.009191,0.008359,,


In [13]:
# Example Cap return
(Results.loc[:,Results.columns.str.startswith("Max_Drawdown")]).head(5)

Unnamed: 0,Max_Drawdown,Max_Drawdown_0,Max_Drawdown_1,Max_Drawdown_2,Max_Drawdown_3,Max_Drawdown_4,Max_Drawdown_5,Max_Drawdown_6,Max_Drawdown_7,Max_Drawdown_8,...,Max_Drawdown_376,Max_Drawdown_377,Max_Drawdown_378,Max_Drawdown_379,Max_Drawdown_380,Max_Drawdown_381,Max_Drawdown_382,Max_Drawdown_383,Max_Drawdown_384,Max_Drawdown_385
0,-0.001419,-0.002711,-0.000376,-0.012667,-0.012579,-0.014333,-0.005971,-0.00112,-0.000288,-0.00025,...,0.0,-0.028952,0.0,0.0,-0.00716,0.0,-0.0051,-0.003571,,
17747,-0.012457,-0.014346,-0.000572,-0.001119,-0.000288,-0.00025,-0.062549,-0.007723,-0.005433,-0.009137,...,-0.00716,0.0,-0.0051,-0.003571,,,,,,
17746,-0.009311,-0.002701,-0.004402,-0.017736,-0.015767,-0.008782,-0.001865,-0.00112,-0.000288,-0.00025,...,0.0,-0.028952,0.0,0.0,-0.00716,0.0,-0.0051,-0.003571,,
17745,-0.008456,-0.002701,-0.004402,-0.017736,-0.015767,-0.008782,-0.001865,-0.00112,-0.000288,-0.00025,...,0.0,-0.028952,0.0,0.0,-0.00716,0.0,-0.0051,-0.003571,,
17744,-0.001863,-0.002701,-0.004402,-0.017736,-0.015767,-0.008782,-0.001865,-0.00112,-0.000288,-0.00025,...,0.0,-0.028952,0.0,0.0,-0.00716,0.0,-0.0051,-0.003571,,


Create some summary statistics

In [14]:
print('Cap Return Run Mean :',round(((Results.loc[:,Results.columns.str.startswith("Cap_Return%")]).mean(axis = 1).mean()),3))
print('Level Run Mean      :',round(((Results.loc[:,Results.columns.str.startswith("Level")]).mean(axis = 1).mean()),3))
print('Run time Mean (min) :',round(((Results.loc[:,Results.columns.str.startswith("Life(s)")]).mean(axis = 1).mean()/60),2))

print('Sharpe Mean         :',round(((Results.loc[:,Results.columns.str.startswith("Sharpe")]).mean(axis = 1).mean()), 4))
print('Drawdown            :',round(((Results.loc[:,Results.columns.str.startswith("Max_Drawdown")]).min(axis = 1).mean()), 4))

Cap Return Run Mean : -0.005
Level Run Mean      : 0.963
Run time Mean (min) : 58.48
Sharpe Mean         : 0.3211
Drawdown            : -0.5085
