In [23]:
# external packages
from pathlib import Path
import matplotlib.pyplot as plt 
from importlib import reload
import matplotlib
%matplotlib qt
import numpy as np
matplotlib.style.use('default')
from datetime import datetime
from datetime import timedelta
import pandas as pd
from collections import namedtuple
from collections import deque

In [2]:
# local modules and packages
from ForexMachine.Preprocessing import get_indicators as gi
from ForexMachine import util
reload(gi)
reload(util)

<module 'ForexMachine.util' from 'c:\\github repos\\forexmachine\\ForexMachine\\util.py'>

# load in and add indicators to raw data

In [3]:
# convert config to dictionary
config = util.yaml_to_dict()
current_model = config['current_model']
indicators = config[current_model]['indicators']
print(indicators)
# Read in data with indicators
data_with_indicators = gi.add_indicators_to_raw(filepath='../Data/RawData/EURUSDi1440.csv', save_to_disk=True, 
                                                config=config)
data_with_indicators.head(55)

{'ichimoku': {'tenkan_period': 9, 'kijun_period': 26, 'chikou_period': 26, 'senkou_b_period': 52}, 'rsi': {'periods': 14}}


Unnamed: 0,Date,Time,Open,High,Low,Close,Volume,datetime,trend_ichimoku_conv,trend_ichimoku_base,trend_ichimoku_a,trend_ichimoku_b,trend_visual_ichimoku_a,trend_visual_ichimoku_b,chikou_span,momentum_rsi
0,2011.10.14,00:00,1.37765,1.38935,1.37231,1.38772,79276,2011-10-14,,,,1.38083,1.197571,1.202139,1.3539,
1,2011.10.16,00:00,1.3878,1.38856,1.38602,1.38775,1313,2011-10-16,,,,1.38083,1.197571,1.202139,1.34631,
2,2011.10.17,00:00,1.38655,1.39139,1.37254,1.37371,82980,2011-10-17,,,,1.38185,1.197571,1.202139,1.34577,
3,2011.10.18,00:00,1.37371,1.38169,1.36526,1.37513,92915,2011-10-18,,,,1.378325,1.197571,1.202139,1.35238,
4,2011.10.19,00:00,1.37511,1.38688,1.37249,1.37602,89933,2011-10-19,,,,1.378325,1.197571,1.202139,1.3525,
5,2011.10.20,00:00,1.37602,1.38427,1.3656,1.37796,94978,2011-10-20,,,,1.378325,1.197571,1.202139,1.34891,
6,2011.10.21,00:00,1.37798,1.39006,1.37038,1.3891,82195,2011-10-21,,,,1.378325,1.197571,1.202139,1.35045,
7,2011.10.23,00:00,1.38436,1.38711,1.38401,1.3869,336,2011-10-23,,,,1.378325,1.197571,1.202139,1.33412,
8,2011.10.24,00:00,1.38441,1.39561,1.3822,1.39282,79819,2011-10-24,1.380435,,,1.380435,1.197571,1.202139,1.33458,
9,2011.10.25,00:00,1.39282,1.39597,1.38521,1.39066,83660,2011-10-25,1.380615,,,1.380615,1.197571,1.202139,1.32375,


# define helper plotting functions

In [422]:
def get_index_range(df,datetime1,datetime2):
    i1 = -1
    i2 = -1
    if datetime1 <= datetime2:
        for i in range(len(df)):
            i1 = i
            if df['datetime'][i] == datetime1:
                break
            if df['datetime'][i] > datetime1:
                i1 = i-1 if i-1 >= 0 else 0
                break
        for i in range(i1, len(df)):
            i2 = i
            if df['datetime'][i] == datetime2:
                break
            if df['datetime'][i] > datetime2:
                i2 = i-1 if i-1 >= 0 else 0
                break
    return i1, i2

# date format 'yyyy.mm.dd'
def show_data_from_range(df, date1, date2, main_indicators = [], sub_indicators = [], visualize_crosses=False, crosses=None):
    start, stop = get_index_range(df, pd.Timestamp.fromisoformat(date1), pd.Timestamp.fromisoformat(date2))
    if start < 0 or stop < 0:
        print('invalid dates')
        return
    
    data_range = df.iloc[start:stop+1]
    chart_count = len(sub_indicators) + 1
    
    top_chart_ratio = 1
    sub_chart_ratio = 0
    if chart_count == 2:
        top_chart_ratio = 3
        sub_chart_ratio = 2 / (chart_count-1)
    if chart_count > 2:
        top_chart_ratio = 1
        sub_chart_ratio = 1 / (chart_count-1)
    height_ratios = [top_chart_ratio]
    height_ratios.extend([sub_chart_ratio]*(chart_count-1))
    fig, axes = plt.subplots(chart_count,1,sharex='col', gridspec_kw={'height_ratios':height_ratios})
    fig.tight_layout(pad=1.8, h_pad=0.0)
    
    top_ax = None
    bottom_ax = None
    if chart_count > 1:
        top_ax = axes[0]
        bottom_ax = axes[len(axes)-1]
    else:
        bottom_ax = top_ax = axes
    top_ax.plot(data_range.Close.to_list(), label='Close',color='brown')
    
    plot_indicator_funcs = {
        'ichimoku': lambda ax, dataf: add_ichimoku_to_plot(ax, dataf, visualize_crosses, crosses),
        'rsi': lambda ax, dataf: add_rsi_to_plot(ax, dataf)
    }
    
    for indicator in main_indicators:
        plot_indicator_funcs[indicator](top_ax, data_range)
    
    for i in range(len(sub_indicators)):
        plot_indicator_funcs[sub_indicators[i]](axes[i+1], data_range)

    bottom_ax.set_xticks(np.arange(len(data_range)))
    #     condition = data_range.index % 10 == 0
    #     condition[-1] = True
    #     x_labels = np.where(condition, data_range['Date'], None)
    x_labels = [dt.strftime('%Y-%m-%d %H:%M') * ((i+1)%2) for i,dt in enumerate(data_range['datetime'])]
    
#     top_ax.axvline(stop-start-5,color='red')
    bottom_ax.set_xticklabels(x_labels,rotation=80, wrap=True)
    
    if chart_count > 1:
        for ax in axes:
            ax.legend()
    else:
        top_ax.legend()
        
    plt.show()

In [408]:
"""
Functions for adding indicators to a matplotlib chart
"""

def add_ichimoku_to_plot(ax, df, visualize_crosses = False, crosses=None):
    ax.plot(df.trend_visual_ichimoku_a.to_list(), label='Senkou-Span a',linestyle='--',color='green')
    ax.plot(df.trend_visual_ichimoku_b.to_list(), label='Senkou-Span b',linestyle='--',color='red')
    ax.fill_between(np.arange(len(df)),df.trend_visual_ichimoku_a,
                    df.trend_visual_ichimoku_b,alpha=0.2,color='green',
                    where=(df.trend_visual_ichimoku_a > df.trend_visual_ichimoku_b))
    ax.fill_between(np.arange(len(df)),df.trend_visual_ichimoku_a,
                    df.trend_visual_ichimoku_b,alpha=0.2,color='red',
                    where=(df.trend_visual_ichimoku_a <= df.trend_visual_ichimoku_b))
    ax.plot(df.trend_ichimoku_conv.to_list(), label='Tenkan-Sen (conversion)',color='cyan')
    ax.plot(df.trend_ichimoku_base.to_list(), label='Kijun Sen (base)',color='blue')
    ax.plot(df.chikou_span.to_list(), label='chikou span',linestyle=':',color='orange')
    
    if visualize_crosses:
        colors = {
            'tk_cross': 'red',
            'tk_price_cross': 'green',
            'senkou_cross': 'blue',
            'chikou_cross': 'orange',
            'kumo_breakout': 'purple'
        }
        
        close = df.Close.to_list()            
        tk_cross_bull_strength = df.tk_cross_bull_strength.to_list()
        tk_cross_bear_strength = df.tk_cross_bear_strength.to_list()
        tk_cross_length_bull = df.tk_cross_bull_length.to_list()
        tk_cross_length_bear = df.tk_cross_bear_length.to_list()
        tk_price_cross_bull_strength = df.tk_price_cross_bull_strength.to_list()
        tk_price_cross_bear_strength = df.tk_price_cross_bear_strength.to_list()
        tk_price_cross_length_bull = df.tk_price_cross_bull_length.to_list()
        tk_price_cross_length_bear = df.tk_price_cross_bear_length.to_list()
        senkou_cross_bull_strength = df.senkou_cross_bull_strength.to_list()
        senkou_cross_bear_strength = df.senkou_cross_bear_strength.to_list()
        senkou_cross_length_bull = df.senkou_cross_bull_length.to_list()
        senkou_cross_length_bear = df.senkou_cross_bear_length.to_list()
        chikou_cross_bull_strength = df.chikou_cross_bull_strength.to_list()
        chikou_cross_bear_strength = df.chikou_cross_bear_strength.to_list()
        chikou_cross_length_bull = df.chikou_cross_bull_length.to_list()
        chikou_cross_length_bear = df.chikou_cross_bear_length.to_list()
        cloud_breakout_bull = df.cloud_breakout_bull.to_list()
        cloud_breakout_bear = df.cloud_breakout_bear.to_list()
        
        if not crosses:
            crosses = set(['tk_cross', 'tk_price_cross', 'senkou_cross', 'chikou_cross', 'kumo_breakout'])
        else:
            crosses = set(crosses)
                
        for i in range(len(df)):
            vert_occupied = False
            filler = ''
            
            # tk cross
            if 'tk_cross' in crosses:
                if not np.isnan(tk_cross_bull_strength[i]) and tk_cross_bull_strength[i] > 0:
                    ax.axvline(x = i, color = colors['tk_cross'])
                    ax.text(x = i, y = close[i], color = colors['tk_cross'],
                            s = f'^ TK Cross Bull\nstrength={tk_cross_bull_strength[i]}\nlength={tk_cross_length_bull[i]}')
                    vert_occupied = True

                if not np.isnan(tk_cross_bear_strength[i]) and tk_cross_bear_strength[i] > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['tk_cross'])
                    ax.text(x = i, y = close[i], color = colors['tk_cross'],
                            s = f'_ TK Cross Bear\nstrength={tk_cross_bear_strength[i]}'
                                f'\nlength={tk_cross_length_bear[i]}{filler}')
                    vert_occupied = True
            
            # tk price cross
            if 'tk_price_cross' in crosses:
                if not np.isnan(tk_price_cross_bull_strength[i]) and tk_price_cross_bull_strength[i] > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['tk_price_cross'])
                    ax.text(x = i, y = close[i], color = colors['tk_price_cross'],
                            s = f'^ TK Price Cross Bull\nstrength={tk_price_cross_bull_strength[i]}'
                                f'\nlength={tk_price_cross_length_bull[i]}{filler}')
                    vert_occupied = True

                if not np.isnan(tk_price_cross_bear_strength[i]) and tk_price_cross_bear_strength[i] > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['tk_price_cross'])
                    ax.text(x = i, y = close[i], color = colors['tk_price_cross'],
                            s = f'_ TK Price Cross Bear\nstrength={tk_price_cross_bear_strength[i]}'
                                f'\nlength={tk_price_cross_length_bear[i]}{filler}')
                    vert_occupied = True
            
            # senkou cross
            if 'senkou_cross' in crosses:
                if not np.isnan(senkou_cross_bull_strength[i]) and senkou_cross_bull_strength[i] > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['senkou_cross'])
                    ax.text(x = i, y = close[i], color = colors['senkou_cross'],
                            s = f'^ Senkou Cross Bull\nstrength={senkou_cross_bull_strength[i]}'
                                f'\nlength={senkou_cross_length_bull[i]}{filler}')
                    vert_occupied = True

                if not np.isnan(senkou_cross_bear_strength[i]) and senkou_cross_bear_strength[i] > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['senkou_cross'])
                    ax.text(x = i, y = close[i], color = colors['senkou_cross'],
                            s = f'_ Senkou Cross Bear\nstrength={senkou_cross_bear_strength[i]}'
                                f'\nlength={senkou_cross_length_bear[i]}{filler}')
                    vert_occupied = True
                
            # chikou cross
            if 'chikou_cross' in crosses:
                if not np.isnan(chikou_cross_bull_strength[i]) and chikou_cross_bull_strength[i] > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['chikou_cross'])
                    ax.text(x = i, y = close[i], color = colors['chikou_cross'],
                            s = f'^ Chikou Cross Bull\nstrength={chikou_cross_bull_strength[i]}'
                                f'\nlength={chikou_cross_length_bull[i]}{filler}')
                    vert_occupied = True

                if not np.isnan(chikou_cross_bear_strength[i]) and chikou_cross_bear_strength[i] > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['chikou_cross'])
                    ax.text(x = i, y = close[i], color = colors['chikou_cross'],
                            s = f'_ Chikou Cross Bear\nstrength={chikou_cross_bear_strength[i]}'
                                f'\nlength={chikou_cross_length_bear[i]}{filler}')
                    vert_occupied = True
            
            # kumo breakout
            if 'kumo_breakout' in crosses:
                if cloud_breakout_bull[i]:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['kumo_breakout'])
                    ax.text(x = i, y = close[i], color = colors['kumo_breakout'], s = f'^ Kumo Breakout Bullish')
                    vert_occupied = True

                if cloud_breakout_bear[i]:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['kumo_breakout'])
                    ax.text(x = i, y = close[i], color = colors['kumo_breakout'], s = f'_ Kumo Breakout Bearish')
                    vert_occupied = True
        

def add_rsi_to_plot(ax, df):
    ax.plot(df.momentum_rsi.to_list(), label='RSI', color='purple')
    ax.plot([30]*len(df),color='gray',alpha=0.5)
    ax.plot([70]*len(df),color='gray',alpha=0.5)
    ax.fill_between(np.arange(len(df)),[30]*len(df),[70]*len(df),color='gray',alpha=0.2)
    ax.set_ylim(15,85)
    ax.set_yticks(np.arange(20,100,20))

# define helper functions and classes for generating features

In [6]:
def add_features(df, inplace=False):
    
    ### temporal features
    
    quarters = []
    days_of_week = []
    months = []
    days = []
    minutes = []
    hours = []
    years = []
    
    ### ichimoku features
    
    is_price_above_cb_lines = []
    is_price_above_cloud = []
    is_price_inside_cloud = []
    is_price_below_cloud = []
    cloud_breakout_bull = []
    cloud_breakout_bear = []
    ticks_since_cloud_breakout_bull = []
    ticks_since_cloud_breakout_bear = []
    
    first_kumo_breakout_bull = False
    first_kumo_breakout_bear = False
    
    # names of each cross type
    cross_names = ['tk_cross','tk_price_cross','senkou_cross','chikou_cross']
    # dict to hold similar features of each cross type
    crosses_dict = {} 
    for name in cross_names:
        crosses_dict[name] = {
            'most_recent_bull_strength': [],
            'most_recent_bear_strength': [],
            'bull_strength': [],
            'bear_strength': [],
            'ticks_since_bull': [],
            'ticks_since_bear': [],
            'most_recent_bull_length': [],
            'most_recent_bear_length': [],
            'bull_length': [],
            'bear_length': [],
            'first_bull': False,
            'first_bear': False
        }
    
    data = df.values
    feature_indices = {df.columns[i]:i for i in range(len(df.columns))}
    
    fg = FeatuteGenerator(data,feature_indices,chikou_period = 26)
    for i in range(len(data)):
        # get temporal features signals
        temporal_features = fg.get_temporal_features(i)
        quarters.append(temporal_features.quarter)
        days_of_week.append(temporal_features.day_of_week)
        months.append(temporal_features.month)
        days.append(temporal_features.day)
        minutes.append(temporal_features.minute)
        hours.append(temporal_features.hour)
        years.append(temporal_features.year)
        
        # get ichimoku signals
        ichimoku_features = fg.get_ichimoku_features(i, cross_length_limit=np.Inf)
        is_price_above_cb_lines.append(ichimoku_features['is_price_above_cb_lines'])
        is_price_above_cloud.append(ichimoku_features['is_price_above_cloud'])
        is_price_inside_cloud.append(ichimoku_features['is_price_inside_cloud'])
        is_price_below_cloud.append(ichimoku_features['is_price_below_cloud'])
        
        # handle kumo breakout
        cloud_breakout_bull.append(ichimoku_features['cloud_breakout_bull'])
        cloud_breakout_bear.append(ichimoku_features['cloud_breakout_bear']) 
        
        if ichimoku_features['cloud_breakout_bull']:
            first_kumo_breakout_bull = True
        if ichimoku_features['cloud_breakout_bear']:
            first_kumo_breakout_bear = True
        
        if first_kumo_breakout_bull:
            if ichimoku_features['cloud_breakout_bull']:
                ticks_since_cloud_breakout_bull.append(0)
            else:
                ticks_since_cloud_breakout_bull.append(ticks_since_cloud_breakout_bull[-1] + 1)
        else:
            ticks_since_cloud_breakout_bull.append(None)
        
        if first_kumo_breakout_bear:
            if ichimoku_features['cloud_breakout_bear']:
                ticks_since_cloud_breakout_bear.append(0)
            else:
                ticks_since_cloud_breakout_bear.append(ticks_since_cloud_breakout_bear[-1] + 1)
        else:
            ticks_since_cloud_breakout_bear.append(None)
        
        # handle other ichimoku cloud crosses
        for cross_name in crosses_dict:
            cross_dict = crosses_dict[cross_name]
            
            bull_strength, bear_strength, cross_length = ichimoku_features[cross_name]
            
            if bull_strength > 0:
                cross_dict['first_bull'] = True 
            if bear_strength > 0:
                cross_dict['first_bear'] = True
            
            if cross_dict['first_bull']:
                if bull_strength > 0:
                    cross_dict['most_recent_bull_strength'].append(bull_strength)
                    cross_dict['bull_strength'].append(bull_strength)
                    cross_dict['ticks_since_bull'].append(0)
                    cross_dict['most_recent_bull_length'].append(cross_length)
                    cross_dict['bull_length'].append(cross_length)
                else:
                    cross_dict['most_recent_bull_strength'].append(cross_dict['most_recent_bull_strength'][-1])
                    cross_dict['bull_strength'].append(0)
                    cross_dict['ticks_since_bull'].append(cross_dict['ticks_since_bull'][-1] + 1)
                    cross_dict['most_recent_bull_length'].append(cross_dict['most_recent_bull_length'][-1])
                    cross_dict['bull_length'].append(0)
            else:
                cross_dict['most_recent_bull_strength'].append(None)
                cross_dict['bull_strength'].append(None)
                cross_dict['ticks_since_bull'].append(None)
                cross_dict['most_recent_bull_length'].append(None)
                cross_dict['bull_length'].append(None)
            
            if cross_dict['first_bear']:
                if bear_strength > 0: 
                    cross_dict['most_recent_bear_strength'].append(bear_strength)
                    cross_dict['bear_strength'].append(bear_strength)
                    cross_dict['ticks_since_bear'].append(0)
                    cross_dict['most_recent_bear_length'].append(cross_length)
                    cross_dict['bear_length'].append(cross_length)
                else:
                    cross_dict['most_recent_bear_strength'].append(cross_dict['most_recent_bear_strength'][-1])
                    cross_dict['bear_strength'].append(0)
                    cross_dict['ticks_since_bear'].append(cross_dict['ticks_since_bear'][-1] + 1)
                    cross_dict['most_recent_bear_length'].append(cross_dict['most_recent_bear_length'][-1])
                    cross_dict['bear_length'].append(0)
            else:
                cross_dict['most_recent_bear_strength'].append(None)
                cross_dict['bear_strength'].append(None)
                cross_dict['ticks_since_bear'].append(None)
                cross_dict['most_recent_bear_length'].append(None)
                cross_dict['bear_length'].append(None)
    
    if not inplace:
        df = df.copy()
    
    df['quarter'] = quarters
    df['day_of_week'] = days_of_week
    df['month'] = months
    df['day'] = days
    df['minute'] = minutes
    df['hour'] = hours
    df['year'] = years
    df['is_price_above_cb_lines'] = is_price_above_cb_lines
    df['is_price_above_cloud'] = is_price_above_cloud
    df['is_price_inside_cloud'] = is_price_inside_cloud
    df['is_price_below_cloud'] = is_price_below_cloud
    df['cloud_breakout_bull'] = cloud_breakout_bull
    df['cloud_breakout_bear'] = cloud_breakout_bear
    df['ticks_since_cloud_breakout_bull'] = ticks_since_cloud_breakout_bull
    df['ticks_since_cloud_breakout_bear'] = ticks_since_cloud_breakout_bear
    
    for cross_name in crosses_dict:
        df[f'{cross_name}_most_recent_bull_strength'] = crosses_dict[cross_name]['most_recent_bull_strength']
        df[f'{cross_name}_most_recent_bear_strength'] = crosses_dict[cross_name]['most_recent_bear_strength']
        df[f'{cross_name}_bull_strength'] = crosses_dict[cross_name]['bull_strength']
        df[f'{cross_name}_bear_strength'] = crosses_dict[cross_name]['bear_strength']
        df[f'{cross_name}_ticks_since_bull'] = crosses_dict[cross_name]['ticks_since_bull']
        df[f'{cross_name}_ticks_since_bear'] = crosses_dict[cross_name]['ticks_since_bear']
        df[f'{cross_name}_most_recent_bull_length'] = crosses_dict[cross_name]['most_recent_bull_length']
        df[f'{cross_name}_most_recent_bear_length'] = crosses_dict[cross_name]['most_recent_bear_length']
        df[f'{cross_name}_bull_length'] = crosses_dict[cross_name]['bull_length']
        df[f'{cross_name}_bear_length'] = crosses_dict[cross_name]['bear_length']
    
    return df

In [7]:
class FeatuteGenerator:
    def __init__(self, data,feature_indices,chikou_period=26):
        self.data = data
        self.feature_indices = feature_indices
        self.rsi_divergence_range = 30
        self.last_rsi_divergence = 0 # 0 - None, 1 - bearish, 2 -  hidden bearish, 3 - bullish, 4 - hidden bullish
        self.rsi_highs = deque()
        self.rsi_lows = deque()
        self.cross_lengths = {}
        self.price_entered_tk_region_from_top = False
        self.price_entered_tk_region_from_bot = False
        self.temporal_features = namedtuple('temporal_features', 'quarter year month day day_of_week hour minute')
        self.chikou_period = chikou_period
        self.safe_start_idx = self._get_safe_ichimoku_idx()
    
    def get_temporal_features(self,i):
        dt = self.data[i][self.feature_indices['datetime']]
        
        features = self.temporal_features(quarter=dt.quarter, year=dt.year, month=dt.month, day=dt.day,
                                          day_of_week=dt.dayofweek, hour=dt.hour, minute=dt.minute)
        return features
    
#     def check_rsi_divergence(self,index):
#         momentum_rsi_i = self.feature_indices['momentum_rsi']
#         if not pd.isna(self.data[index][momentum_rsi_i]):
#             rsi1 = self.data[index][momentum_rsi_i]
#             rsi2 = self.data[index-1][momentum_rsi_i]
#             rsi3 = self.data[index-2][momentum_rsi_i]
#             if rsi1 < rsi2 and rsi3 < rsi2:
#                 self.rsi_highs.appendleft((index-1,rsi2))
#                 for high in self.rsi_highs:
#                     if high[0] == index-1:
#                         continue
                    
#             elif rsi1 > rsi2 and rsi3 > rsi2:
#                 self.rsi_lows.appendleft((index-1,rsi2))

#             if len(self.rsi_highs) > 0:
#                 if self.rsi_highs[0][0] < index - self.rsi_divergence_range:
#                     self.rsi_highs.pop()
#             if len(self.rsi_lows) > 0:
#                 if self.rsi_lows[0][0] < index - self.rsi_divergence_range:
#                     self.rsi_lows.pop()           
    
    def get_ichimoku_features(self, i, cross_length_limit = 1):
        is_price_above_cb_lines = None
        is_price_above_cloud = None
        is_price_inside_cloud = None
        is_price_below_cloud = None
        cloud_top = None
        cloud_bottom = None

        # cross signals represented as tuples: (bullish strength, bearish strength, cross length)
        # - cross signal strength indicated by 0, 1, 2, 3 for none, weak, neutral, strong
        #    or just 0, 1, 3 for none, weak, strong
        # - cross length is just the number of ticks the cross occured over
        tk_cross = (0,0,0)
        tk_price_cross = (0,0,0)
        senkou_cross = (0,0,0)
        chikou_cross = (0,0,0)
        cloud_breakout_bull = False
        cloud_breakout_bear = False
        
        close = self.feature_indices['Close']
        trend_visual_ichimoku_a = self.feature_indices['trend_visual_ichimoku_a']
        trend_visual_ichimoku_b = self.feature_indices['trend_visual_ichimoku_b']
        trend_ichimoku_conv = self.feature_indices['trend_ichimoku_conv']
        trend_ichimoku_base = self.feature_indices['trend_ichimoku_base']
        chikou_span = self.feature_indices['chikou_span']
        
        cloud_top, cloud_bottom = self._get_top_and_bottom_line_idx(trend_visual_ichimoku_a,trend_visual_ichimoku_b,i)

        if not pd.isna(self.data[i][trend_ichimoku_conv]) and not pd.isna(self.data[i][trend_ichimoku_base]):
            if self.data[i][close] > self.data[i][trend_ichimoku_conv] and self.data[i][close] > self.data[i][trend_ichimoku_base]:
                is_price_above_cb_lines = True
            else:
                is_price_above_cb_lines = False

            if self.data[i][close] > self.data[i][cloud_bottom] and self.data[i][close] < self.data[i][cloud_top]:
                is_price_inside_cloud = True
                is_price_above_cloud = False
                is_price_below_cloud = False
            else:
                is_price_inside_cloud = False
                if self.data[i][close] <= self.data[i][cloud_bottom]:
                    is_price_above_cloud = False
                    is_price_below_cloud = True
                else:
                    is_price_above_cloud = True
                    is_price_below_cloud = False
        
        ### check for crosses
        
        if i >= self.safe_start_idx:
            
            ### tk cross
            
            cross, length, top_line_i, bottom_line_i = \
                self._get_cross_and_length('tk_cross', trend_ichimoku_conv,trend_ichimoku_base,i)
            
            # price cross clean through both tk region (cross == 2), or price cross through both 
            # tk region over limited amout of ticks (cross == 3 and length <= cross_length_limit)
            if cross == 2 \
                    or (cross == 3 and length <= cross_length_limit):
                
                # bullish
                if top_line_i == trend_ichimoku_conv:
                    if self._is_line_between_others(top_line_i,cloud_top,cloud_bottom,i) \
                            and self._is_line_between_others(bottom_line_i,cloud_top,cloud_bottom,i):
                        tk_cross = (2,0,length)
                    elif self.data[i][bottom_line_i] >= self.data[i][cloud_top]:
                        tk_cross = (3,0,length)
                    else:
                        tk_cross = (1,0,length)
                # bearish
                elif top_line_i == trend_ichimoku_base:
                    if self._is_line_between_others(top_line_i,cloud_top,cloud_bottom,i) \
                            and self._is_line_between_others(bottom_line_i,cloud_top,cloud_bottom,i):
                        tk_cross = (0,2,length)
                    elif self.data[i][top_line_i] <= self.data[i][cloud_bottom]:
                        tk_cross = (0,3,length)
                    else:
                        tk_cross = (0,1,length)
                else:
                    print('weird 5:', self.data[i][self.feature_indices['datetime']])
                
            ### tk price cross
            
            cross_res = self._get_cross_and_length_regions('tk_price_cross', trend_ichimoku_conv, trend_ichimoku_base,
                                                            close, close, i)
            cross, length, first_line, second_line, third_line, fourth_line = cross_res
            
            if cross == 2 or (cross == 3 and length <= cross_length_limit):
                
                # "It’s a noise zone when price is in the Cloud"
                #  https://www.tradeciety.com/the-complete-ichimoku-trading-guide-how-to-use-the-ichimoku-indicator/
                
                # bullish 
                if first_line == close:
                    if self.data[i][close] >= self.data[i][cloud_top]:
                        tk_price_cross = (3,0,length)
                    elif self.data[i][close] <= self.data[i][cloud_bottom]:
                        tk_price_cross = (1,0,length)
                # bearish
                elif fourth_line == close:
                    if self.data[i][close] >= self.data[i][cloud_top]:
                        tk_price_cross = (0,1,length)
                    elif self.data[i][close] <= self.data[i][cloud_bottom]:
                        tk_price_cross = (0,3,length)
            elif cross == 3 and length>cross_length_limit:
                print(f'cross type = {cross}, cross length = {length}, {self.data[i][self.feature_indices["datetime"]]}')
            
            ### cloud (senkou) cross
            
            # As the Senkou Spans are projected forward, the cross that triggers this signal will be 26 days ahead of the 
            # price and, hence, the actual date that the signal occurs.  The strength of the signal is determined by the 
            # relationship of the price on the date of the signal (not the trigger) to the Kumo (Cloud)
            # - https://www.ichimokutrader.com/signals.html
            
            cross, length, top_line_i, bottom_line_i = \
                self._get_cross_and_length('cloud_cross', trend_visual_ichimoku_a, trend_visual_ichimoku_b,i)
            
            if cross == 2 \
                    or (cross == 3 and length <= cross_length_limit):
                
                # note you are checking close at index of 26 before current, read comment above
                chikou_period = self.chikou_period
                
                old_cloud_top, old_cloud_bottom = self._get_top_and_bottom_line_idx(trend_visual_ichimoku_a,
                                                                                    trend_visual_ichimoku_b,
                                                                                    i-chikou_period)
                
                # bullish
                if top_line_i == trend_visual_ichimoku_a:
                    if self._is_line_between_others(close,old_cloud_top,old_cloud_bottom,i-chikou_period):
                        senkou_cross = (2,0,length)
                    elif self.data[i-chikou_period][close] >= self.data[i-chikou_period][old_cloud_top]:
                        senkou_cross = (3,0,length)
                    else:
                        senkou_cross = (1,0,length)
                # bearish
                elif top_line_i == trend_visual_ichimoku_b:
                    if self._is_line_between_others(close,old_cloud_top,old_cloud_bottom,i-chikou_period):
                        senkou_cross = (0,2,length)
                    elif self.data[i-chikou_period][close] <= self.data[i-chikou_period][old_cloud_bottom]:
                        senkou_cross = (0,3,length)
                    else:
                        senkou_cross = (0,1,length)
                else:
                    print('weird 5:', self.data[i][self.feature_indices['datetime']])
                
            ### chikou span cross

            # Note (1) that the Chikou Span must be rising when it crosses to above the price for a bull signal 
            # and falling when it crosses to below for a bear signal; just crossing the price alone is not 
            # sufficient to trigger the signal. (2) As the Chikou Span is the closing price shifted into the past, 
            # the cross that triggers this signal will be 26 days behind the price and, hence, the actual date 
            # that the signal occurs.The strength of the signal is determined by the relationship of the price 
            # on the date of the signal (not the trigger) to the Kumo (Cloud).
            # - https://www.ichimokutrader.com/signals.html
            
            # note you are checking for cross at index of 26 before current because that is where the chikou
            # is, read comment above
            cross, length, top_line_i, bottom_line_i = \
                self._get_cross_and_length('chikou_cross', chikou_span, close, i-self.chikou_period)
            
            if cross == 2 \
                    or (cross == 3 and length <= cross_length_limit):
                # bullish
                if top_line_i == chikou_span:
                    if self._is_line_between_others(close, cloud_top, cloud_bottom, i):
                        chikou_cross = (2,0,length)
                    elif self.data[i][close] > self.data[i][cloud_top]:
                        chikou_cross = (3,0,length)
                    else:
                        chikou_cross = (1,0,length)
                # bearish
                elif top_line_i == close:
                    if self._is_line_between_others(close, cloud_top, cloud_bottom, i):
                        chikou_cross = (0,2,length)
                    elif self.data[i][close] < self.data[i][cloud_bottom]:
                        chikou_cross = (0,3,length)
                    else:
                        chikou_cross = (0,1,length)
                else:
                    print('weird 6:', self.data[i][self.feature_indices['datetime']])
            
            ### kumo (cloud) breakout
            
            cross_res = self._get_cross_and_length_regions('kumo_breakout', cloud_top, cloud_bottom,
                                                            close, close, i)
            cross, length, first_line, second_line, third_line, fourth_line = cross_res
            
            # The Kumo Breakout signal occurs when the price leaves or crosses the Kumo (Cloud), which is why
            # we also want to check for if cross == 4 (end of overlap but not a cross)
            # - https://www.ichimokutrader.com/signals.html
            if cross == 2 or cross == 3 or cross == 4:
                # bullish 
                if first_line == close:
                    cloud_breakout_bull = True
                # bearish
                elif fourth_line == close:
                    cloud_breakout_bear = True
        
        features = {
            'is_price_above_cb_lines': is_price_above_cb_lines,
            'is_price_above_cloud': is_price_above_cloud,      
            'is_price_inside_cloud': is_price_inside_cloud,   
            'is_price_below_cloud': is_price_below_cloud,   
            'cloud_top': cloud_top,   
            'cloud_bottom': cloud_bottom,   
            'tk_cross': tk_cross,   
            'tk_price_cross': tk_price_cross,   
            'senkou_cross': senkou_cross,   
            'chikou_cross': chikou_cross,   
            'cloud_breakout_bull': cloud_breakout_bull,
            'cloud_breakout_bear': cloud_breakout_bear
        }
        
        return features
    
    def _get_safe_ichimoku_idx(self):
        exluded_features = {'chikou_span'}
        safe_idx = None
        
        for i in range(len(self.data)):
            nan_in_row = False
            
            for feature in self.feature_indices:
                if feature in exluded_features:
                    continue
                    
                feature_i = self.feature_indices[feature]
                if isinstance(self.data[i][feature_i], float) and np.isnan(self.data[i][feature_i]):
                    safe_idx = None
                    nan_in_row = True
                    break
            
            if not nan_in_row and not safe_idx:
                safe_idx = i
        
        # need to add offset period of chikou and senkou spans to safely determine senkou (cloud) cross strength
        return safe_idx + self.chikou_period
    
    def _get_top_and_bottom_line_idx(self,line1_i,line2_i,i):
        """
        line1_i is top if line values are equal
        """
        top_line_i = line1_i
        bottom_line_i = line2_i
        if self.data[i][line1_i] < self.data[i][line2_i]:
            top_line_i = line2_i
            bottom_line_i = line1_i
        return top_line_i, bottom_line_i
    
    def _is_line_between_others(self,target_line_i,top_line_i,bottom_line_i,i):
        if self.data[i][target_line_i] > self.data[i][bottom_line_i] \
            and self.data[i][target_line_i] < self.data[i][top_line_i]:
            return True
        return False
    
    def _get_cross_and_length_regions(self, cross_name, r1_line1, r1_line2, r2_line1, r2_line2, i):
        """
        cross type can be: no cross '=' (0), start of overlap '>' (1), full cross 'X' (2), end of cross '<' (3),
            or end of overlap w/ no cross (4)
        """
        
        old_r1_top, old_r1_bot = self._get_top_and_bottom_line_idx(r1_line1,r1_line2,i-1)
        old_r2_top, old_r2_bot = self._get_top_and_bottom_line_idx(r2_line1,r2_line2,i-1)
        
        r1_top, r1_bot = self._get_top_and_bottom_line_idx(r1_line1,r1_line2,i)
        r2_top, r2_bot = self._get_top_and_bottom_line_idx(r2_line1,r2_line2,i)
        
        # defines lines from top to bottom between both regions
        sorted_regions_lines = sorted([(line, self.data[i][line]) for line in [r1_top,r1_bot,r2_top,r2_bot]],
                                     key = lambda line_tuple: line_tuple[1], reverse=True)
        first_line, second_line, third_line, fourth_line = sorted_regions_lines
        
        ### check for no cross
        
        old_top_region_bot = None
    
        # region 1 is fully above region 2
        if self.data[i-1][old_r1_bot] > self.data[i-1][old_r2_top]:
            old_top_region_bot = old_r1_bot
            if self.data[i][r1_bot] > self.data[i][r2_top]:
                return 0, 0, first_line[0], second_line[0], third_line[0], fourth_line[0]
        # region 2 is fully above region 1
        elif self.data[i-1][old_r2_bot] > self.data[i-1][old_r1_top]:
            old_top_region_bot = old_r2_bot
            if self.data[i][r2_bot] > self.data[i][r1_top]:
                return 0, 0, first_line[0], second_line[0], third_line[0], fourth_line[0]
        
        ### check for full cross
        
        # region 1 crossed to below region 2
        if self.data[i-1][old_r1_bot] > self.data[i-1][old_r2_top] \
                and self.data[i][r1_top] < self.data[i][r2_bot]:
            return 2, 0, first_line[0], second_line[0], third_line[0], fourth_line[0]
        
        # region 2 crossed to below region 1
        elif self.data[i-1][old_r2_bot] > self.data[i-1][old_r1_top] \
                and self.data[i][r2_top] < self.data[i][r1_bot]:
            return 2, 0, first_line[0], second_line[0], third_line[0], fourth_line[0]
        
        ### check for start of overlap
        
        top_region_top = top_region_bot = bot_region_top = bot_region_bot = None
        # region 1 is highest
        if self.data[i][r1_top] > self.data[i][r2_top]:
            top_region_top = r1_top
            top_region_bot = r1_bot
            bot_region_top = r2_top
            bot_region_bot = r2_bot
        # region 2 is highest
        else:
            top_region_top = r2_top
            top_region_bot = r2_bot
            bot_region_top = r1_top
            bot_region_bot = r1_bot

        # checking for start of overlap
        if cross_name not in self.cross_lengths:  
            # if the bottom line of the top region is still not defined then just consider no cross 
            if not old_top_region_bot:
                return 0, 0, first_line[0], second_line[0], third_line[0], fourth_line[0]  
            else:
                # one region is beginning to intertwine or completely swallow the other, regardless this counts
                # as the start of an overlap
                if self.data[i][bot_region_top] <= self.data[i][top_region_top] \
                        and self.data[i][bot_region_top] >= self.data[i][top_region_bot]:
                    self.cross_lengths[cross_name] = (0, old_top_region_bot)
                    return 1, 0, first_line[0], second_line[0], third_line[0], fourth_line[0]
                print('weird 11:', self.data[i][self.feature_indices['datetime']])
        else:
            # check for continuation of overlap
            if self.data[i][bot_region_top] <= self.data[i][top_region_top] \
                    and self.data[i][bot_region_top] >= self.data[i][top_region_bot]:
                self.cross_lengths[cross_name] = (self.cross_lengths[cross_name][0] + 1, self.cross_lengths[cross_name][1])
                return 0, self.cross_lengths[cross_name][0], first_line[0], second_line[0], third_line[0], fourth_line[0]
            # otherwise, 1 region must be completely above the other
            else:
                original_top_region_bot = self.cross_lengths[cross_name][1]
                res = None
                
                # check for end of cross
                if original_top_region_bot != top_region_bot and original_top_region_bot != top_region_top:
                    res = 3
                # otherwise, end of overlap w/ no cross
                else:
                    res = 4
                
                cross_length = self.cross_lengths[cross_name][0]
                del self.cross_lengths[cross_name]
                return res, cross_length, first_line[0], second_line[0], third_line[0], fourth_line[0]

            
    def _get_cross_and_length(self, cross_name, line_index1, line_index2, index):
        """
        cross type can be: no cross '=' (0), start of overlap '>' (1), full cross 'X' (2), end of cross '<' (3),
            or end of overlap w/ no cross (4)
        """
        
        # remember that if lines are of equal values the first line argument to _get_top_and_bottom_line_idx()
        # will be returned as the top line
        old_top_line_i, old_bottom_line_i = self._get_top_and_bottom_line_idx(line_index1,line_index2,index - 1)
        top_line_i, bottom_line_i = self._get_top_and_bottom_line_idx(line_index1,line_index2,index)
        
        ## check for no cross
        
        if self.data[index][line_index1] != self.data[index][line_index2] \
                and self.data[index-1][line_index1] != self.data[index-1][line_index2] \
                and old_top_line_i == top_line_i \
                and bottom_line_i == old_bottom_line_i:
            return 0, 0, top_line_i, bottom_line_i

        ## check for full cross
        
        if old_top_line_i != top_line_i \
                and self.data[index-1][old_top_line_i] > self.data[index-1][old_bottom_line_i] \
                and self.data[index][old_top_line_i] < self.data[index][old_bottom_line_i]:
            return 2, 0, top_line_i, bottom_line_i
        
        ##check for start of overlap
        
        if cross_name not in self.cross_lengths:
            if self.data[index][line_index1] == self.data[index][line_index2]:
                self.cross_lengths[cross_name] = (0, old_top_line_i,self.data[index][self.feature_indices['datetime']])
                return 1, 0, top_line_i, bottom_line_i
            print('weird 1:', self.data[index][self.feature_indices['datetime']])
        else:
            
            ## check for continuation of overlap
            
            if self.data[index][line_index1] == self.data[index][line_index2]:
                self.cross_lengths[cross_name] = (self.cross_lengths[cross_name][0] + 1, self.cross_lengths[cross_name][1]
                                                  ,self.cross_lengths[cross_name][2])
                return 0, self.cross_lengths[cross_name][0], top_line_i, bottom_line_i
            else:
                cross_old_top_line_i = self.cross_lengths[cross_name][1]
                res = None

                ## check for end of cross
                
                if cross_old_top_line_i != top_line_i:
                    res = 3
                # otherwise, end of overlap w/ no cross
                else:
                    res = 4 

                cross_length = self.cross_lengths[cross_name][0]
                del self.cross_lengths[cross_name]
                return res,cross_length, top_line_i, bottom_line_i

In [8]:
data_with_ichi = add_features(data_with_indicators)
data_with_ichi.tail(30)

Unnamed: 0,Date,Time,Open,High,Low,Close,Volume,datetime,trend_ichimoku_conv,trend_ichimoku_base,...,chikou_cross_most_recent_bull_strength,chikou_cross_most_recent_bear_strength,chikou_cross_bull_strength,chikou_cross_bear_strength,chikou_cross_ticks_since_bull,chikou_cross_ticks_since_bear,chikou_cross_most_recent_bull_length,chikou_cross_most_recent_bear_length,chikou_cross_bull_length,chikou_cross_bear_length
2224,2020.04.27,00:00,1.08218,1.086,1.08113,1.08284,87814,2020-04-27,1.085885,1.08913,...,1.0,3.0,0.0,0.0,1.0,17.0,0.0,0.0,0.0,0.0
2225,2020.04.28,00:00,1.08278,1.08883,1.08099,1.08187,90036,2020-04-28,1.081905,1.093515,...,1.0,3.0,0.0,0.0,2.0,18.0,0.0,0.0,0.0,0.0
2226,2020.04.29,00:00,1.08186,1.08856,1.0817,1.08694,89936,2020-04-29,1.081185,1.093695,...,1.0,3.0,0.0,0.0,3.0,19.0,0.0,0.0,0.0,0.0
2227,2020.04.30,00:00,1.08692,1.09725,1.08331,1.0955,114202,2020-04-30,1.084975,1.093695,...,1.0,3.0,0.0,0.0,4.0,20.0,0.0,0.0,0.0,0.0
2228,2020.05.01,00:00,1.0953,1.10178,1.09345,1.09792,86555,2020-05-01,1.08724,1.093695,...,1.0,3.0,0.0,3.0,5.0,0.0,0.0,0.0,0.0,0.0
2229,2020.05.04,00:00,1.09658,1.09737,1.08957,1.09053,96028,2020-05-04,1.08724,1.093535,...,1.0,3.0,0.0,0.0,6.0,1.0,0.0,0.0,0.0,0.0
2230,2020.05.05,00:00,1.09044,1.09258,1.08259,1.08355,98092,2020-05-05,1.08724,1.089005,...,1.0,3.0,0.0,0.0,7.0,2.0,0.0,0.0,0.0,0.0
2231,2020.05.06,00:00,1.08359,1.08452,1.0782,1.07931,93308,2020-05-06,1.08724,1.08829,...,1.0,3.0,0.0,0.0,8.0,3.0,0.0,0.0,0.0,0.0
2232,2020.05.07,00:00,1.07931,1.08339,1.07665,1.08326,106121,2020-05-07,1.089215,1.08724,...,1.0,3.0,0.0,0.0,9.0,4.0,0.0,0.0,0.0,0.0
2233,2020.05.08,00:00,1.08314,1.08754,1.08151,1.08372,82827,2020-05-08,1.089215,1.08724,...,1.0,3.0,0.0,0.0,10.0,5.0,0.0,0.0,0.0,0.0


In [59]:
show_data_from_range(data_with_ichi, '2007-10-10', '2022-11-15', 
                     main_indicators=['ichimoku'], sub_indicators=['rsi'],visualize_crosses=True)

In [138]:
show_data_from_range(data_with_ichi, '2012-01-10', '2012-02-05', 
                     main_indicators=['ichimoku'], sub_indicators=['rsi'],visualize_crosses=True)

In [60]:
show_data_from_range(data_with_ichi, '2012-07-20', '2012-10-02', 
                     main_indicators=['ichimoku'], sub_indicators=['rsi'],visualize_crosses=True)

In [27]:
gi.save_data_with_indicators(data_with_ichi,filename='hello')

WindowsPath('C:/GitHub Repos/ForexMachine/Data/DataWithIndicators/hello.csv')

# load data from mt5 terminal

In [14]:
filepath = gi.download_mt5_data("EURUSD", 'D1', '2012-01-02', '2020-06-06')

EURUSD D1 tick data already saved at C:\GitHub Repos\ForexMachine\Data\RawData\mt5_EURUSD_D1_ticks_2012-01-02T00;00UTC_to_2020-06-06T00;00UTC.csv


In [15]:
# data_with_indicators_2 = gi.add_indicators_to_raw(filepath=filepath, save_to_disk=True, 
#                                                   config=config,
#                                                   has_headers=True)
data_with_indicators_2 = gi.add_indicators_to_raw(filepath='C:\\GitHub Repos\\ForexMachine\\Data\\RawData\\mt5_EURUSD_D1_ticks_2012-01-02T00;00UTC_to_2020-06-06T00;00UTC.csv', 
                                                  save_to_disk=True, 
                                                  config=config, 
                                                  has_headers=True,
                                                  datetime_col='datetime')

data_with_ichi_2 = add_features(data_with_indicators_2)
data_with_ichi_2.tail(10)

Unnamed: 0,datetime,Open,High,Low,Close,trend_ichimoku_conv,trend_ichimoku_base,trend_ichimoku_a,trend_ichimoku_b,trend_visual_ichimoku_a,...,chikou_cross_most_recent_bull_strength,chikou_cross_most_recent_bear_strength,chikou_cross_bull_strength,chikou_cross_bear_strength,chikou_cross_ticks_since_bull,chikou_cross_ticks_since_bear,chikou_cross_most_recent_bull_length,chikou_cross_most_recent_bear_length,chikou_cross_bull_length,chikou_cross_bear_length
2178,2020-05-25 00:03:00,1.08953,1.09144,1.08706,1.08956,1.08915,1.087245,1.088197,1.093595,1.09115,...,2.0,2.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,0.0
2179,2020-05-26 00:00:00,1.08922,1.09958,1.08919,1.09818,1.08915,1.087245,1.088197,1.093595,1.091862,...,2.0,2.0,0.0,0.0,3.0,4.0,0.0,0.0,0.0,0.0
2180,2020-05-27 00:00:00,1.09814,1.10306,1.09339,1.10026,1.09097,1.087875,1.089422,1.091225,1.090677,...,2.0,2.0,0.0,0.0,4.0,5.0,0.0,0.0,0.0,0.0
2181,2020-05-28 00:00:00,1.09962,1.10934,1.09917,1.10757,1.09466,1.091015,1.092837,1.089125,1.08941,...,2.0,2.0,0.0,0.0,5.0,6.0,0.0,0.0,0.0,0.0
2182,2020-05-29 00:00:00,1.10756,1.1145,1.10669,1.11076,1.10078,1.093595,1.097187,1.089125,1.08823,...,2.0,2.0,0.0,0.0,6.0,7.0,0.0,0.0,0.0,0.0
2183,2020-06-01 00:02:00,1.1115,1.11536,1.11003,1.11343,1.10121,1.096,1.098605,1.089455,1.087503,...,2.0,2.0,0.0,0.0,7.0,8.0,0.0,0.0,0.0,0.0
2184,2020-06-02 00:00:00,1.11342,1.11957,1.11151,1.11685,1.103315,1.098105,1.10071,1.09156,1.087503,...,2.0,2.0,0.0,0.0,8.0,9.0,0.0,0.0,0.0,0.0
2185,2020-06-03 00:00:00,1.11678,1.12576,1.11665,1.12333,1.10641,1.1012,1.103805,1.09901,1.08769,...,2.0,2.0,0.0,0.0,9.0,10.0,0.0,0.0,0.0,0.0
2186,2020-06-04 00:00:00,1.12321,1.13621,1.11947,1.13365,1.111635,1.106425,1.10903,1.10445,1.087432,...,2.0,2.0,0.0,0.0,10.0,11.0,0.0,0.0,0.0,0.0
2187,2020-06-05 00:00:00,1.13367,1.13837,1.12782,1.12874,1.11378,1.107505,1.110642,1.10553,1.08933,...,2.0,2.0,0.0,0.0,11.0,12.0,0.0,0.0,0.0,0.0


In [288]:
show_data_from_range(data_with_ichi_2, '2014-01-01', '2017-01-01', 
                     main_indicators=['ichimoku'], sub_indicators=['rsi'],visualize_crosses=True)

In [13]:
show_data_from_range(data_with_ichi, '2014-01-01', '2017-01-01', 
                     main_indicators=['ichimoku'], sub_indicators=['rsi'],visualize_crosses=True)

In [342]:
filepath = gi.save_data_with_indicators(data_with_ichi_2,filename='hello2')
str(filepath)

'C:\\GitHub Repos\\ForexMachine\\Data\\DataWithIndicators\\hello2.csv'

# backtest tutorial

### high level process of using backtrader:

```python
class MyStrategy(bt.Strategy):
    def next(self):
        pass

# Instantiate Cerebro engine
cerebro = bt.Cerebro()

# add strategy to cerebro
cerebro.addstrategy(MyStrategy)

# run cerebro engine
cerebro.run()
```

In [211]:
import backtrader as bt

In [328]:
class PrintClose(bt.Strategy):
    def __init__(self):
        # keep a reference to the "close" line in the data[0] dataseries
        self.dataclose = self.datas[0].close
    
    def log(self, txt, dt=None):
        dt = dt or self.datas[0].datetime.date(0)
        print(f'{dt.isoformat()}, {txt}')
    
    def next(self):
        # Simply log the closing price of the series from the reference
        self.log(f'Close: {self.dataclose[0]}')

class MAcrossover(bt.Strategy):
    # Moving average parameters
    params = (('pfast', 20), ('pslow', 50))
    
    def __init__(self):
        self.dataclose = self.datas[0].close
        
        # Order variable will contain ongoing order details/status
        self.order = None
        
        # Instantiate moving averages
        self.slow_sma = bt.indicators.MovingAverageSimple(self.datas[0], 
                        period=self.params.pslow)
        self.fast_sma = bt.indicators.MovingAverageSimple(self.datas[0], 
                        period=self.params.pfast)
    
    def log(self, txt, dt=None):
        dt = dt or self.datas[0].datetime.date(0)
        print(f'{dt.isoformat()} {txt}') # Comment this line when running optimization
        
    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # An activate buy/sell order has been submitted/accepted - Nothing to do
            return
        
        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log(f'BUY EXECUTED, size: {order.executed.size}, price: {order.executed.price}, cost: {order.executed.value}, commision: {order.executed.comm}')
                print(f'current balance: {self.broker.getvalue()}')
            elif order.issell():
                self.log(f'SELL EXECUTED, size: {order.executed.size}, price: {order.executed.price}, cost: {order.executed.value}, commision: {order.executed.comm}')
                print(f'current balance: {self.broker.getvalue()}')
            self.bar_executed = len(self)
        
        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.log('Order Canceled/Margin/Rejected')
        
        # Reset orders
        self.order = None
    
    def notify_trade(self, trade):
        if not trade.isclosed:
            return
        
        print()
        self.log(f'OPERATION PROFIT, GROSS: {trade.pnl}, NET: {trade.pnlcomm}')
        print(f'open dt: {trade.open_datetime()} close dt: {trade.close_datetime()}')
        print(f'close price: {trade.price}')
        print(f'bar opened: {trade.baropen}, bar closed: {trade.barclose}')
        print(f'number of bars trade was open for: {trade.barlen}')
        print(f'current balance: {self.broker.getvalue()}')
        print()
    
    def next(self):
        # Check if an order is pending ... if yes, we cannot send a 2nd one
        if self.order:
#             print(f'order value: {order.value}')
#             print(f'order pprice: {order.pprice}')
#             print(f'order psize: {order.psize}')
#             print(f'order pnl: {order.pnl}')
            return
        
        # Check if we are in the market
        if not self.position:
            # We are not in the market, look for a signal to OPEN trades
            
            #If the 20 SMA is above the 50 SMA
            if self.fast_sma[0] > self.slow_sma[0] and self.fast_sma[-1] < self.slow_sma[-1]:
                self.log(f'BUY CREATE {self.dataclose[0]}')
                # Keep track of the created order to avoid a 2nd order
                self.order = self.buy()
            #Otherwise if the 20 SMA is below the 50 SMA
            elif self.fast_sma[0] < self.slow_sma[0] and self.fast_sma[-1] > self.slow_sma[-1]:
                self.log(f'SELL CREATE {self.dataclose[0]}')
                # Keep track of the created order to avoid a 2nd order
                self.order = self.sell()
        else:
            # We are already in the market, look for a signal to CLOSE trades
            if len(self) >= (self.bar_executed + 5):
                self.log(f'CLOSE CREATE {self.dataclose[0]}')
                self.order = self.close()

In [329]:
# Instantiate Cerebro engine
cerebro = bt.Cerebro()

total_time_delta = data_with_ichi_2.iloc[-1,0] - data_with_ichi_2.iloc[0,0]
from1, to1 = data_with_ichi_2.iloc[0,0], data_with_ichi_2.iloc[0,0] + total_time_delta / 2
from2, to2 = data_with_ichi_2.iloc[0,0] + total_time_delta / 2, data_with_ichi_2.iloc[-1,0]

forex_data1 = bt.feeds.GenericCSVData(
    dataname=str(filepath),
    datetime=0,
    open=1,
    high=2,
    low=3,
    close=4,
    volume=-1,
    openinterest=-1,
    fromdate=from1.to_pydatetime(),
    todate=to1.to_pydatetime(),
    timeframe=bt.TimeFrame.Minutes
)

forex_data2 = bt.feeds.GenericCSVData(
    dataname=str(filepath),
    datetime=0,
    open=1,
    high=2,
    low=3,
    close=4,
    volume=-1,
    openinterest=-1,
    fromdate=from2.to_pydatetime(),
    todate=to2.to_pydatetime(),
    timeframe=bt.TimeFrame.Minutes
)

# add data to cerebro to read over
cerebro.adddata(forex_data1)

# add strategy to cerebro
cerebro.addstrategy(MAcrossover)

# Default position size
cerebro.addsizer(bt.sizers.SizerFix, stake=5000)

In [330]:
start_balance = cerebro.broker.getvalue()

cerebro.run()

end_balance = cerebro.broker.getvalue()

pnl = end_balance - start_balance
print(f'Starting Portfolio Value: {start_balance}')
print(f'Final Portfolio Value: {end_balance}')
print(f'PnL: {pnl}')

2012-04-05 SELL CREATE 1.30635
2012-04-06 SELL EXECUTED, size: -5000, price: 1.30634, cost: -6531.700000000001, commision: 0.0
current balance: 9986.150000000001
2012-04-13 CLOSE CREATE 1.30764
2012-04-16 BUY EXECUTED, size: 5000, price: 1.3069, cost: -6531.700000000001, commision: 0.0
current balance: 9997.2

2012-04-16 OPERATION PROFIT, GROSS: -2.7999999999994696, NET: -2.7999999999994696
open dt: 2012-04-06 00:00:00 close dt: 2012-04-16 00:00:00
close price: 1.30634
bar opened: 70, bar closed: 76
number of bars trade was open for: 6
current balance: 9997.2

2012-08-28 BUY CREATE 1.25635
2012-08-29 BUY EXECUTED, size: 5000, price: 1.25633, cost: 6281.65, commision: 0.0
current balance: 9980.800000000001
2012-09-05 CLOSE CREATE 1.26021
2012-09-06 SELL EXECUTED, size: -5000, price: 1.2602, cost: 6281.65, commision: 0.0
current balance: 10016.550000000001

2012-09-06 OPERATION PROFIT, GROSS: 19.3500000000002, NET: 19.3500000000002
open dt: 2012-08-29 00:00:00 close dt: 2012-09-06 00:00:

### Generating Labels

In [594]:
def generate_ichimoku_labels(df, min_profit_percent=0.001, units_per_trade=100000, profit_noise_percent=0.0006):
    min_profit = min_profit_percent * units_per_trade
    profit_noise = profit_noise_percent * units_per_trade
    data = df.values
    feature_indices = {df.columns[i]:i for i in range(len(df.columns))}
    
    # if any of these columns are equal to 0 then the corresponding signal has occured at that tick
    signals_to_consider = ['ticks_since_cloud_breakout_bull','ticks_since_cloud_breakout_bear',   # cloud breakout
                           'tk_cross_ticks_since_bull', 'tk_cross_ticks_since_bear',              # Tenkan Sen / Kijun Sen Cross
                           'tk_price_cross_ticks_since_bull', 'tk_price_cross_ticks_since_bear',  # price crossing both the Tenkan Sen / Kijun Sen
                           'senkou_cross_ticks_since_bull', 'senkou_cross_ticks_since_bear',      # Senkou Span Cross
                           'chikou_cross_ticks_since_bull', 'chikou_cross_ticks_since_bear']      # Chikou Span Cross
    
    # first find index at which data becomes consistant (no missing data)
    start_idx = None
    exluded_features = {'chikou_span'}
    for i, row in enumerate(data):
        nan_in_row = False
            
        for feature in feature_indices:
            if feature in exluded_features:
                continue
                
            feature_i = feature_indices[feature]
            if pd.isnull(row[feature_i]):
                start_idx = None
                nan_in_row = True
                break

        if not nan_in_row and not start_idx:
            start_idx = i
    
    def get_decision_label(trade, first):
        decision = 'first' if first else 'second'
        label = None
        if trade[f'{decision}_decision_best_buy_profit'][0] > trade[f'{decision}_decision_best_sell_profit'][0]:
            # add 1 to ticks_till_peak to reserve 0 ticks for 'wait' labels
            ticks_till_peak = trade[f'{decision}_decision_best_buy_profit'][1] - trade['trade_open_tick_i'] + 1
            label = ['buy', ticks_till_peak, trade[f'{decision}_decision_best_buy_profit'][0]]
        elif trade[f'{decision}_decision_best_buy_profit'][0] < trade[f'{decision}_decision_best_sell_profit'][0]:
            ticks_till_peak = trade[f'{decision}_decision_best_sell_profit'][1] - trade['trade_open_tick_i'] + 1
            label = ['sell', ticks_till_peak, trade[f'{decision}_decision_best_sell_profit'][0]]

        if label and label[2] < min_profit:
            label = ['wait', 0, 0]

        if trade[f'{decision}_decision_best_buy_profit'][0] == trade[f'{decision}_decision_best_sell_profit'][0]:
            print(f'{decision} decision best buy and sell profit equal (lol how?), trade: {trade}')
            label = ['wait', 0, 0]
        return label
    
    # now simulate hedged trades to determine labels
    labels_dict = {}   # 6 labels per label: (1) decision (buy, sell, wait) (str), (2) ticks till best profit (int), and (3) the profit (float) x2 for each decision
    trades = {}
    pending_order = None
    for i, row in enumerate(data[start_idx:]):
        i += start_idx
        
        if pending_order is not None:
            pending_order_i, causes = pending_order
            signal_price = data[pending_order_i][feature_indices['Close']]
            open_price = data[i][feature_indices['Open']]
            trades[pending_order_i] = {
                'signal_price': signal_price,
                'signal_datetime': data[pending_order_i][feature_indices['datetime']],
                'open_price': open_price,
                'trade_open_tick_i': i,
                'causes': causes,
                'consider_profit': False,
                'first_decision_best_buy_profit': None, # should be tuple of size 2 where 1st elem is the profit and 2nd is the number of bars to get to that profit
                'first_decision_best_sell_profit': None,
                'second_decision_best_buy_profit': None,
                'second_decision_best_sell_profit': None,
                'first_decision_done': False,
                'second_decision_done': False,
                'first_decision_done_tick_dt': None,
                'second_decision_done_tick_dt': None
            }
            pending_order = None
        
        closed_trades = []
        for trade_i in trades:
            trade = trades[trade_i]
            trade_open_price = trade['open_price']
            close_price = row[feature_indices['Close']]
            last_close_price = data[i-1][feature_indices['Close']] if i-1 != trade_i else trade_open_price
            buy_profit = (close_price - trade_open_price) * units_per_trade
            sell_profit = buy_profit * -1
            
            if abs(buy_profit) >= profit_noise:
                trade['consider_profit'] = True
                        
            if not trade['first_decision_done']:
                if not trade['first_decision_best_buy_profit'] or trade['first_decision_best_buy_profit'][0] < buy_profit:
                    trade['first_decision_best_buy_profit'] = (buy_profit, i, f'debug notes: ({close_price} - {trade_open_price})*{units_per_trade}')
                
                if not trade['first_decision_best_sell_profit'] or trade['first_decision_best_sell_profit'][0] < sell_profit:
                    trade['first_decision_best_sell_profit'] = (sell_profit, i, f'debug notes: ({close_price} - {trade_open_price})*{units_per_trade}')
                
                # test for end of 1st decision: see if current close price crossed the intial price at which the trade was opened at
                # note: only look for crosses after profit has exceeded profit_noise (small amounts of profit w/ respect to units_per_trade)
                if trade['consider_profit'] and \
                        ((last_close_price < trade_open_price and close_price >= trade_open_price) \
                        or (last_close_price > trade_open_price and close_price <= trade_open_price)): 
                    label = get_decision_label(trade, first=True)
                    trade['first_decision_done'] = True
                    trade['first_decision_done_tick_dt'] = data[i][feature_indices['datetime']]
                    labels_dict[trade_i] = {'first_decision': label}
                    
                    trade['consider_profit'] = False
                    
                    # at this point trade['second_decision_best_buy_profit'] should be None
                    trade['second_decision_best_buy_profit'] = (buy_profit, i,f'debug notes: ({close_price} - {trade_open_price})*{units_per_trade}')
                    trade['second_decision_best_sell_profit'] = (sell_profit, i,f'debug notes: ({close_price} - {trade_open_price})*{units_per_trade}')
                    
                
            elif not trade['second_decision_done']:
                if trade['second_decision_best_buy_profit'][0] < buy_profit:
                    trade['second_decision_best_buy_profit'] = (buy_profit, i,f'debug notes: ({close_price} - {trade_open_price})*{units_per_trade}')
                
                if trade['second_decision_best_sell_profit'][0] < sell_profit:
                    trade['second_decision_best_sell_profit'] = (sell_profit, i,f'debug notes: ({close_price} - {trade_open_price})*{units_per_trade}')
                
                # test for end of 2nd decision: see if current close price crossed the intial price at which the trade was opened at again
                # note: only look for crosses after profit has exceeded profit_noise (small amounts of profit w/ respect to units_per_trade)
                if trade['consider_profit'] and \
                        ((last_close_price < trade_open_price and close_price >= trade_open_price) \
                        or (last_close_price > trade_open_price and close_price <= trade_open_price)): 
                    label = get_decision_label(trade, first=False)
                    trade['second_decision_done'] = True
                    trade['second_decision_done_tick_dt'] = data[i][feature_indices['datetime']]
                    labels_dict[trade_i]['second_decision'] = label
                    
                    labels_dict[trade_i]['causes'] = ','.join(trade['causes'])
                    closed_trades.append(trade_i)  
        
        for trade_i in closed_trades:
            del trades[trade_i]
        
        causes = []
        for sig in signals_to_consider:
            sig_i = feature_indices[sig]
            if row[sig_i] == 0:
                causes.append(sig)
        
        if len(causes) > 0:
            pending_order = (i, causes)
    
    for trade_i in trades:
        trade = trades[trade_i]
        
        if not trade['first_decision_done']:
            label = get_decision_label(trade, first=True)
            labels_dict[trade_i] = {'first_decision': label}
                    
        elif not trade['second_decision_done']:
            label = get_decision_label(trade, first=False)
            labels_dict[trade_i]['second_decision'] = label
                    
        labels_dict[trade_i]['causes'] = ','.join(trade['causes'])

    first_decision_labels_count = None
    second_decision_labels_count = None
    for i in labels_dict:
        entry = labels_dict[i]
        if 'first_decision' in entry:
            if not first_decision_labels_count:
                first_decision_labels_count = len(entry['first_decision'])
            elif first_decision_labels_count != len(entry['first_decision']):
                print(f'number of 1st decision labels are not equal for each row (row {i}: {entry["first_decision"]}, ' 
                      f'changed from {first_decision_labels_count} to {len(entry["first_decision"])},)!')
                return None
        if 'second_decision' in entry:
            if not second_decision_labels_count:
                second_decision_labels_count = len(entry['second_decision'])
            elif second_decision_labels_count != len(entry['second_decision']):
                print(f'number of 2nd decision labels are not equal for each row (row {i}: {entry["second_decision"]}, ' 
                      f'changed from {second_decision_labels_count} to {len(entry["second_decision"])})!')
                return None
    
    labels = []
    for i in range(len(data)):
        if i in labels_dict:
            entry = labels_dict[i] 
            causes_label = entry['causes']
            
            first_decision_labels = [None] * first_decision_labels_count
            second_decision_labels = [None] * second_decision_labels_count
            if 'first_decision' in entry:
                first_decision_labels = entry['first_decision']
            if 'second_decision' in entry:
                second_decision_labels = entry['second_decision']
                            
            label = [*first_decision_labels, *second_decision_labels, causes_label]

            labels.append(label)
        else:
            labels.append([None] * (first_decision_labels_count + second_decision_labels_count + 1)) # +1 for causes label
    
    label_names = ['first_decision','ticks_till_best_profit_first_decision', 'best_profit_first_decision',
                   'second_decision','ticks_till_best_profit_second_decision', 'best_profit_second_decision',
                   'causes']
    
    labels_df = pd.DataFrame(labels, columns=label_names)
    
    return labels_df

In [595]:
generate_ichimoku_labels(data_with_ichi_2)

Unnamed: 0,first_decision,ticks_till_best_profit_first_decision,best_profit_first_decision,second_decision,ticks_till_best_profit_second_decision,best_profit_second_decision,causes
0,,,,,,,
1,,,,,,,
2,,,,,,,
3,,,,,,,
4,,,,,,,
...,...,...,...,...,...,...,...
2183,,,,,,,
2184,,,,,,,
2185,,,,,,,
2186,,,,,,,


In [596]:
def compare_labels_dataframes(df1, df2, print_each_line=True, only_show_diff_rows=True):
    data1 = tuple(df1.itertuples())
    data2 = tuple(df2.itertuples())
    
    if len(data1) != len(data2):
        print('dataframes do not have equal number of rows')
    
    diff_rows = 0
    for row1, row2 in zip(data1, data2):
        row1 = tuple(row1)
        row2 = tuple(row2)
        all_equal = True
        diff_cols = []
        
        for i in range(max(len(row1), len(row2))):
            if pd.isnull(row1[i]) and pd.isnull(row2[i]):
                continue
            elif isinstance(row1[i], float) and isinstance(row2[i], float):
                if round(row1[i], 3) != round(row2[i], 3):
                    all_equal = False
                    diff_cols.append(i)
            else:
                if row1[i] != row2[i]:
                    all_equal = False
                    diff_cols.append(i)
            
        if not all_equal:
            diff_rows += 1
        
        if print_each_line and (not only_show_diff_rows or (only_show_diff_rows and not all_equal)):
            print(f'df1 row: {row1}')
            print(f'df2 row: {row2}')
            print(f'df1 row types: {[type(x) for x in row1]}')
            print(f'df2 row types: {[type(x) for x in row2]}')
            print(f'same rows: {all_equal}')
            print(f'different column indices: {diff_cols}\n')

    print(f'number of diff rows: {diff_rows}')

In [597]:
d1 = generate_ichimoku_labels(data_with_ichi_2)
# d1.to_csv('./test1.csv')
d2 = generate_ichimoku_labels(data_with_ichi_2, min_profit_percent=0.0001, profit_noise_percent=0)
# d2.to_csv('./test2.csv')
compare_labels_dataframes(d1, d2)

df1 row: (292, 'buy', 3.0, 275.000000000003, 'sell', 4.0, 697.9999999999986, 'tk_price_cross_ticks_since_bear')
df2 row: (292, 'sell', 2.0, 56.99999999999594, 'buy', 3.0, 275.000000000003, 'tk_price_cross_ticks_since_bear')
df1 row types: [<class 'int'>, <class 'str'>, <class 'float'>, <class 'float'>, <class 'str'>, <class 'float'>, <class 'float'>, <class 'str'>]
df2 row types: [<class 'int'>, <class 'str'>, <class 'float'>, <class 'float'>, <class 'str'>, <class 'float'>, <class 'float'>, <class 'str'>]
same rows: False
different column indices: [1, 2, 3, 4, 5, 6]

df1 row: (355, 'sell', 3.0, 950.0000000000064, 'sell', 10.0, 790.9999999999861, 'tk_cross_ticks_since_bear')
df2 row: (355, 'sell', 3.0, 950.0000000000064, 'sell', 8.0, 35.00000000000725, 'tk_cross_ticks_since_bear')
df1 row types: [<class 'int'>, <class 'str'>, <class 'float'>, <class 'float'>, <class 'str'>, <class 'float'>, <class 'float'>, <class 'str'>]
df2 row types: [<class 'int'>, <class 'str'>, <class 'float'>, <

In [575]:
show_data_from_range(data_with_ichi, '2016-01-11', '2016-03-12', 
                     main_indicators=['ichimoku'], sub_indicators=['rsi'], visualize_crosses=True)

In [591]:
show_data_from_range(data_with_ichi, '2012-10-29', '2012-12-29', 
                     main_indicators=['ichimoku'], sub_indicators=['rsi'], visualize_crosses=True)

In [None]:
"""
if self.data[i][self.feature_indices['datetime']].strftime('%Y-%m-%dT%H:%M') == '2013-05-28T10:00':
    print('yo')
"""

"""
Notes:

- Function Idea:  When computing the output for a given test input, in other words when deciding if it was best
                to buy or sell at this point, create a function that takes a "max_loss" kinda arg, so if you
                lose more than max_loss...
                
- As a starting point for training data, only consider points from data_with_indicators when the price breaks out 
  below or above the cloud due to this strat: https://www.tradeciety.com/the-complete-ichimoku-trading-guide-how-to-use-the-ichimoku-indicator/

- When actually training, make sure to try out several combinations of different features for freatuer selection. For
  example, see if the "most_recently_" and "ticks_since_" type features for crosses work better than the respective 
  feature that will be non-zero only on the exact tick that a cross happens
"""

"""
To-do:

1) add function to generate labels (buy, sell, or do nothing) for ichimoku training data generated so far
"""