In [1]:
import pandas as pd
import numpy as np
import matplotlib as mp
import matplotlib.pyplot as plt
from datetime import datetime
import json
import seaborn as sns

pd.set_option('display.max_columns', None)

In [2]:
data = pd.read_csv('./IntermediateData/setup_data_export.csv')

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61027 entries, 0 to 61026
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      61027 non-null  object 
 1   Time      61027 non-null  object 
 2   Open      61027 non-null  float64
 3   High      61027 non-null  float64
 4   Low       61027 non-null  float64
 5   Close     61027 non-null  float64
 6   Session   61027 non-null  object 
 7   DateTime  61027 non-null  object 
dtypes: float64(4), object(4)
memory usage: 3.7+ MB


In [4]:
data.head()

Unnamed: 0,Date,Time,Open,High,Low,Close,Session,DateTime
0,10/18/2021,09:30:00,15076.5,15092.75,15060.25,15061.0,RTH,09:30:00
1,10/18/2021,09:35:00,15061.25,15088.0,15052.0,15083.25,RTH,09:35:00
2,10/18/2021,09:40:00,15083.0,15117.75,15074.75,15111.25,RTH,09:40:00
3,10/18/2021,09:45:00,15111.0,15135.5,15105.25,15135.0,RTH,09:45:00
4,10/18/2021,09:50:00,15134.75,15144.0,15125.0,15128.0,RTH,09:50:00


# Algo to gather trades and results:
get first 5m candle if high of candle is touched 1 tick above we enter at a position, with the SL being at the low of the 5m candle then we get stopped vice versa for shorts

whatever the stop size is... that is the size for the TP as well... 1:1 RR

after SL or TP, trade is done and we move onto the next day

algo design:

will need the use of vectorized functions, a loop would take too long on this size... and this would also improve my programming skill

perhaps i can group by sessions and then run paramterized vector on that with for loops for each session

if no trade in session (the whole thing is within 5m!!!) then ignore day... if trade but ends up in the 5m candle, just take difference of close and then entry to find the profit of points.

first i can get rid of gbx sessions... we can keep premarket within and hour but stop at 4:10 PM EST. so 8:25AM - 4:10PM.

the group the sessions together

then run above algo

In [5]:
data_grouped = data.groupby(['Date'])

In [6]:
def getHighLow(candle):
    return {'High':candle['High'] + 0.25, 'Low':candle['Low'] - 0.25}

In [7]:
#sesh is the group being passed, in other words it is the RTH session for that day where we are looking for
#5m break trade. 
def getZanekTrade(sesh):
    #get first 5 minute candle
    first_fmc = sesh.iloc[0]
    
    #get limit order placement 
    limits = getHighLow(first_fmc)
    
    #begin for loop to get trade entry and direction, and the value of RR (if RR is 30, and direction is 
    #short, then entry - 30 is SL and entry + 30 is TP)
    entry = {'Location':0, 'Direction':'', 'RR':0, 'entry_time':None}
    
    candle_entry_num = 0
    
    for i, candle in sesh.iterrows():
        if candle['High'] >= limits['High']:
            
            #tagged limit order at ffm candle high
            entry['Location'] = limits['High']
            
            #set direction, since we tagged high first, we long 
            entry['Direction'] = 'Long'
            
            #get RR
            RR = limits['High'] - limits['Low']
            entry['RR'] = RR

            #record entry time
            entry['entry_time'] = candle['DateTime']
            
            #keep track of candle index:
            candle_entry_num = i
            
            break
        elif candle['Low'] <= limits['Low']:
            
            #tagged limit order at ffm candle high
            entry['Location'] = limits['Low']
            
            #set direction, since we tagged high first, we long 
            entry['Direction'] = 'Short'
            
            #get RR
            RR = (limits['Low'] - limits['High']) * -1
            entry['RR'] = RR
            
            #record entry time
            entry['entry_time'] = candle['DateTime']

            #keep track of candle index:
            candle_entry_num = i
            
            break
            
    if candle_entry_num is None:
        print("what the fuck")
        return
    
    # TP is entry + size of candle in the direction of the break, same for SL, except opposite direction, so basically SL is at the low of the candle, 
    # so RR is what we are risking and rewarding
    TP = entry['Location'] + RR if entry['Direction'] == 'Long' else entry['Location'] - RR
    SL = entry['Location'] - RR if entry['Direction'] == 'Long' else entry['Location'] + RR
    
    for i, candle in sesh.loc[candle_entry_num+1:].iterrows():
        if entry['Direction'] == 'Long':
            #if we long, and candle high hits TP, then thats the win
            if (candle['High'] >= TP):
                return (entry, candle['DateTime'], True)
            #if we long, and the candle low hits the SL, thats the loss
            elif(candle['Low'] <= SL):
                return (entry, candle['DateTime'], False)
        #short
        else:
            #if short, the high of the candle reach the SL then we lose trade
            if (candle['High'] >= SL):
                return (entry, candle['DateTime'], False)
            #if short, the low of the candle hits TP then we win the trade
            elif(candle['Low'] <= TP):
                return (entry, candle['DateTime'], True)

        

In [8]:
results = {}

#Loop through the groupedby object of the data 
for name, group in data_grouped:
    results[name] = getZanekTrade(group)

In [9]:
results

{('1/10/2022',): ({'Location': 15311.0,
   'Direction': 'Short',
   'RR': 104.25,
   'entry_time': '09:35:00'},
  '10:10:00',
  True),
 ('1/10/2023',): ({'Location': 11246.25,
   'Direction': 'Long',
   'RR': 108.5,
   'entry_time': '10:10:00'},
  '10:40:00',
  False),
 ('1/10/2024',): ({'Location': 16873.75,
   'Direction': 'Long',
   'RR': 34.5,
   'entry_time': '09:35:00'},
  '09:50:00',
  False),
 ('1/11/2022',): ({'Location': 15578.75,
   'Direction': 'Long',
   'RR': 72.5,
   'entry_time': '09:35:00'},
  '09:40:00',
  False),
 ('1/11/2023',): ({'Location': 11348.25,
   'Direction': 'Long',
   'RR': 57.5,
   'entry_time': '09:35:00'},
  '12:15:00',
  True),
 ('1/11/2024',): ({'Location': 17015.0,
   'Direction': 'Long',
   'RR': 40.25,
   'entry_time': '09:35:00'},
  '09:45:00',
  False),
 ('1/12/2022',): ({'Location': 15926.25,
   'Direction': 'Short',
   'RR': 66.0,
   'entry_time': '09:40:00'},
  '10:05:00',
  False),
 ('1/12/2023',): ({'Location': 11440.5,
   'Direction': 'Sho

results should be good

In [10]:
formatted_data = {
    str(key): value for key, value in results.items()
}

formatted_data = {key.strip("()'"): value for key, value in formatted_data.items()}
formatted_data = {key.strip(",'"): value for key, value in formatted_data.items()}


# Save to a JSON file
with open('./IntermediateData/zanek_initial_results.json', 'w') as f:
    json.dump(formatted_data, f, indent=4)