In [1]:
# external packages
from pathlib import Path
import matplotlib.pyplot as plt 
from importlib import reload
import matplotlib
%matplotlib qt
# %matplotlib inline
import numpy as np
matplotlib.style.use('default')
from datetime import datetime
from datetime import timedelta
import pandas as pd
from collections import namedtuple
from collections import deque
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import ParameterGrid
import random
import time

In [2]:
# local modules and packages
from ForexMachine.Preprocessing import get_indicators as gi
from ForexMachine import util
reload(gi)
reload(util)

<module 'ForexMachine.util' from 'c:\\github repos\\forexmachine\\ForexMachine\\util.py'>

# trying loading and adding indicators to raw data w/ ForexMachine package

In [None]:
# convert config to dictionary
config = util.yaml_to_dict()
current_model = config['current_model']
indicators = config[current_model]['indicators']
print(indicators)
# Read in data with indicators
data_with_indicators = gi.add_indicators_to_raw(filepath='../Data/RawData/EURUSDi1440.csv', save_to_disk=True, 
                                                config=config)
data_with_indicators.head(55)

# define helper plotting functions

In [3]:
def get_index_range(datetime1, datetime2, datetimes):
    i1 = -1
    i2 = -1
    if datetime1 <= datetime2:
        for i in range(len(datetimes)):
            i1 = i
            if datetimes[i] == datetime1:
                break
            if datetimes[i] > datetime1:
                i1 = i-1 if i-1 >= 0 else 0
                break
        for i in range(i1, len(datetimes)):
            i2 = i
            if datetimes[i] == datetime2:
                break
            if datetimes[i] > datetime2:
                i2 = i-1 if i-1 >= 0 else 0
                break
    return i1, i2

# date format 'yyyy.mm.dd'
def show_data_from_range(df, date1, date2, main_indicator, sub_indicators = [], visualize_crosses=False, crosses=None,
                         visualize_labels=False, labels_df=None, labels=None):
    
    start, stop = get_index_range(pd.Timestamp.fromisoformat(date1), pd.Timestamp.fromisoformat(date2), df['datetime'].to_numpy())
    if start < 0 or stop < 0:
        print(f'invalid dates (start i = {start}, stop i = {stop})')
        return
    
    data_range = df.iloc[start:stop+1]
    chart_count = len(sub_indicators) + 1
    
    top_chart_ratio = 1
    sub_chart_ratio = 0
    if chart_count == 2:
        top_chart_ratio = 3
        sub_chart_ratio = 2 / (chart_count-1)
    if chart_count > 2:
        top_chart_ratio = 1
        sub_chart_ratio = 1 / (chart_count-1)
    height_ratios = [top_chart_ratio]
    height_ratios.extend([sub_chart_ratio]*(chart_count-1))
    fig, axes = plt.subplots(chart_count,1,sharex='col', gridspec_kw={'height_ratios':height_ratios})
    fig.tight_layout(pad=1.8, h_pad=0.0)
    
    top_ax = None
    bottom_ax = None
    if chart_count > 1:
        top_ax = axes[0]
        bottom_ax = axes[len(axes)-1]
    else:
        bottom_ax = top_ax = axes
    top_ax.plot(data_range.Close.to_list(), label='Close',color='brown')
    
    plot_indicator_funcs = {
        'ichimoku': lambda ax, dataf: add_ichimoku_to_plot(ax, dataf, visualize_crosses, crosses),
        'rsi': lambda ax, dataf: add_rsi_to_plot(ax, dataf),
        'extra': lambda ax, extra_df, plot_range: add_extra_data_to_plot(ax, extra_df, plot_range)
    }
    
    plot_indicator_funcs[main_indicator](top_ax, data_range)
    
    for i in range(len(sub_indicators)):
        item = sub_indicators[i]
        if isinstance(item, str):
            plot_indicator_funcs[sub_indicators[i]](axes[i+1], data_range)
        elif isinstance(item, pd.DataFrame):
            plot_indicator_funcs['extra'](axes[i+1], item, (start, stop))
        
    if visualize_labels and labels_df is not None:
        add_labels_to_plot(top_ax, df, labels_df, (start, stop), labels)

    bottom_ax.set_xticks(np.arange(len(data_range)))
    x_labels = [dt.strftime('%Y-%m-%d %H:%M') * ((i+1)%2) for i,dt in enumerate(data_range['datetime'])]
    bottom_ax.set_xticklabels(x_labels,rotation=80, wrap=True)
    
    if chart_count > 1:
        for ax in axes:
            ax.legend()
    else:
        top_ax.legend()
        
    plt.show()

    
"""
Functions for adding indicators to a matplotlib chart
"""

def add_ichimoku_to_plot(ax, df, visualize_crosses = False, crosses=None):
    ax.plot(df.trend_visual_ichimoku_a.to_list(), label='Senkou-Span a',linestyle='--',color='green')
    ax.plot(df.trend_visual_ichimoku_b.to_list(), label='Senkou-Span b',linestyle='--',color='red')
    ax.fill_between(np.arange(len(df)),df.trend_visual_ichimoku_a,
                    df.trend_visual_ichimoku_b,alpha=0.2,color='green',
                    where=(df.trend_visual_ichimoku_a > df.trend_visual_ichimoku_b))
    ax.fill_between(np.arange(len(df)),df.trend_visual_ichimoku_a,
                    df.trend_visual_ichimoku_b,alpha=0.2,color='red',
                    where=(df.trend_visual_ichimoku_a <= df.trend_visual_ichimoku_b))
    ax.plot(df.trend_ichimoku_conv.to_list(), label='Tenkan-Sen (conversion)',color='cyan')
    ax.plot(df.trend_ichimoku_base.to_list(), label='Kijun Sen (base)',color='blue')
    ax.plot(df.chikou_span_visual.to_list(), label='chikou span',linestyle=':',color='orange')
    
    if visualize_crosses:
        colors = {
            'tk_cross': 'hotpink',
            'tk_price_cross': 'brown',
            'senkou_cross': 'blue',
            'chikou_cross': 'orange',
            'kumo_breakout': 'purple'
        }
        
        df_idx = {df.columns[i]: i for i in range(len(df.columns))}
        data = df.to_numpy()
        
        if crosses is None:
            crosses = set(['tk_cross', 'tk_price_cross', 'senkou_cross', 'chikou_cross', 'kumo_breakout'])
        else:
            crosses = set(crosses)
                
        for i in range(len(data)):   
            close = data[i][df_idx['Close']]            
            vert_occupied = False
            filler = ''
            
            # tk cross
            if 'tk_cross' in crosses:
                tk_cross_bull_strength = abs(data[i][df_idx['tk_cross_bull_strength']])
                tk_cross_bear_strength = abs(data[i][df_idx['tk_cross_bear_strength']])
                tk_cross_length_bull = data[i][df_idx['tk_cross_bull_length']]
                tk_cross_length_bear = data[i][df_idx['tk_cross_bear_length']]
            
                if not np.isnan(tk_cross_bull_strength) and tk_cross_bull_strength > 0:
                    ax.axvline(x = i, color = colors['tk_cross'])
                    ax.text(x = i, y = close, color = colors['tk_cross'],
                            s = f'^ TK Cross Bull\nstrength={tk_cross_bull_strength}\nlength={tk_cross_length_bull}')
                    vert_occupied = True

                if not np.isnan(tk_cross_bear_strength) and tk_cross_bear_strength > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['tk_cross'])
                    ax.text(x = i, y = close, color = colors['tk_cross'],
                            s = f'_ TK Cross Bear\nstrength={tk_cross_bear_strength}'
                                f'\nlength={tk_cross_length_bear}{filler}')
                    vert_occupied = True
            
            # tk price cross
            if 'tk_price_cross' in crosses:
                tk_price_cross_bull_strength = abs(data[i][df_idx['tk_price_cross_bull_strength']])
                tk_price_cross_bear_strength = abs(data[i][df_idx['tk_price_cross_bear_strength']])
                tk_price_cross_length_bull = data[i][df_idx['tk_price_cross_bull_length']]
                tk_price_cross_length_bear = data[i][df_idx['tk_price_cross_bear_length']]
                
                if not np.isnan(tk_price_cross_bull_strength) and tk_price_cross_bull_strength > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['tk_price_cross'])
                    ax.text(x = i, y = close, color = colors['tk_price_cross'],
                            s = f'^ TK Price Cross Bull\nstrength={tk_price_cross_bull_strength}'
                                f'\nlength={tk_price_cross_length_bull}{filler}')
                    vert_occupied = True

                if not np.isnan(tk_price_cross_bear_strength) and tk_price_cross_bear_strength > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['tk_price_cross'])
                    ax.text(x = i, y = close, color = colors['tk_price_cross'],
                            s = f'_ TK Price Cross Bear\nstrength={tk_price_cross_bear_strength}'
                                f'\nlength={tk_price_cross_length_bear}{filler}')
                    vert_occupied = True
            
            # senkou cross
            if 'senkou_cross' in crosses:
                senkou_cross_bull_strength = abs(data[i][df_idx['senkou_cross_bull_strength']])
                senkou_cross_bear_strength = abs(data[i][df_idx['senkou_cross_bear_strength']])
                senkou_cross_length_bull = data[i][df_idx['senkou_cross_bull_length']]
                senkou_cross_length_bear = data[i][df_idx['senkou_cross_bear_length']]
                
                if not np.isnan(senkou_cross_bull_strength) and senkou_cross_bull_strength > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['senkou_cross'])
                    ax.text(x = i, y = close, color = colors['senkou_cross'],
                            s = f'^ Senkou Cross Bull\nstrength={senkou_cross_bull_strength}'
                                f'\nlength={senkou_cross_length_bull}{filler}')
                    vert_occupied = True

                if not np.isnan(senkou_cross_bear_strength) and senkou_cross_bear_strength > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['senkou_cross'])
                    ax.text(x = i, y = close, color = colors['senkou_cross'],
                            s = f'_ Senkou Cross Bear\nstrength={senkou_cross_bear_strength}'
                                f'\nlength={senkou_cross_length_bear}{filler}')
                    vert_occupied = True
                
            # chikou cross
            if 'chikou_cross' in crosses:
                chikou_cross_bull_strength = abs(data[i][df_idx['chikou_cross_bull_strength']])
                chikou_cross_bear_strength = abs(data[i][df_idx['chikou_cross_bear_strength']])
                chikou_cross_length_bull = data[i][df_idx['chikou_cross_bull_length']]
                chikou_cross_length_bear = data[i][df_idx['chikou_cross_bear_length']]
                
                if not np.isnan(chikou_cross_bull_strength) and chikou_cross_bull_strength > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['chikou_cross'])
                    ax.text(x = i, y = close, color = colors['chikou_cross'],
                            s = f'^ Chikou Cross Bull\nstrength={chikou_cross_bull_strength}'
                                f'\nlength={chikou_cross_length_bull}{filler}')
                    vert_occupied = True

                if not np.isnan(chikou_cross_bear_strength) and chikou_cross_bear_strength > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['chikou_cross'])
                    ax.text(x = i, y = close, color = colors['chikou_cross'],
                            s = f'_ Chikou Cross Bear\nstrength={chikou_cross_bear_strength}'
                                f'\nlength={chikou_cross_length_bear}{filler}')
                    vert_occupied = True
            
            # kumo breakout
            if 'kumo_breakout' in crosses:
                cloud_breakout_bull = data[i][df_idx['cloud_breakout_bull']]
                cloud_breakout_bear = data[i][df_idx['cloud_breakout_bear']]
                
                if cloud_breakout_bull:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['kumo_breakout'])
                    ax.text(x = i, y = close, color = colors['kumo_breakout'], s = f'^ Kumo Breakout Bullish{filler}')
                    vert_occupied = True

                if cloud_breakout_bear:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['kumo_breakout'])
                    ax.text(x = i, y = close, color = colors['kumo_breakout'], s = f'_ Kumo Breakout Bearish{filler}')
                    vert_occupied = True
        

def add_rsi_to_plot(ax, df):
    ax.plot(df.momentum_rsi.to_list(), label='RSI', color='purple')
    ax.plot([30]*len(df),color='gray',alpha=0.5)
    ax.plot([70]*len(df),color='gray',alpha=0.5)
    ax.fill_between(np.arange(len(df)),[30]*len(df),[70]*len(df),color='gray',alpha=0.2)
    ax.set_ylim(15,85)
    ax.set_yticks(np.arange(20,100,20))

def add_labels_to_plot(ax, all_feat_df, labels_df, plot_range, labels=None):
    if labels is None:
        labels = set(['first_decision','ticks_till_best_profit_first_decision', 'best_profit_first_decision', 'profit_peak_first_decision',
                      'second_decision', 'ticks_till_best_profit_second_decision', 'best_profit_second_decision', 'profit_peak_second_decision',
                      'decision_pred','ticks_till_best_profit_decision_pred', 'best_profit_decision_pred', 'profit_peak_decision_pred'])
    else:
        labels = set(labels)
    
    colors = {
        'buy': 'green',
        'sell': 'red',
    }
    
    start, stop = plot_range
    plot_data_len = stop-start+1
    
    feat_data = all_feat_df.to_numpy()
    labels_data = labels_df.to_numpy()
    feat_df_idx = {all_feat_df.columns[i]: i for i in range(len(all_feat_df.columns))}
    labels_df_idx = {labels_df.columns[i]: i for i in range(len(labels_df.columns))}
    
    verts_occupied = {}
    for i in range(plot_data_len):
        labels_i = i + start
        close = feat_data[labels_i][feat_df_idx['Close']]            
        
        # the 1st and 2nd decisions should never occupy the same vert
        printed_causes = False
        for label_name in ['first_decision', 'second_decision', 'decision_pred']: 
            decision = None if label_name not in labels else labels_data[labels_i][labels_df_idx[label_name]]
            if not pd.isnull(decision):
                decision_type = 'true'
                if label_name == 'decision_pred':
                    decision_type = 'prediction'
                
                lines = 2
                filler = '\n'
                if i in verts_occupied:
                    filler = '\n' * (verts_occupied[i] + 1)
                
                color = colors[decision]
                txt = [f'{filler}---------------------------------------',
                       f'{decision_type} {label_name}: {decision}']

                if f'best_profit_{label_name}' in labels:
                    profit = labels_data[labels_i][labels_df_idx[f'best_profit_{label_name}']] 
                    txt.append(f'best profit: {profit}')
                    lines+=1

                    if f'profit_peak_{label_name}' in labels:
                        peak_idx = int(labels_data[labels_i][labels_df_idx[f'profit_peak_{label_name}']])
                        plot_idx = peak_idx - start
                        txt.append(f'best profit datetime: {feat_data[peak_idx][feat_df_idx["datetime"]].strftime("%Y-%m-%d %H:%M")}')
                        lines+=1

                        if plot_idx < plot_data_len:
                            peak_close = feat_data[peak_idx][feat_df_idx['Close']]   
                            ax.plot(plot_idx, peak_close, marker='o', markersize=12, color='black')
                            filler_2 = '\n'
                            if plot_idx in verts_occupied:
                                filler_2 = ' \n' * (verts_occupied[plot_idx] + 1)
                                verts_occupied[plot_idx] += 2
                            else:
                                verts_occupied[plot_idx] = 2
                            ax.text(x=plot_idx, y=peak_close, color=color, verticalalignment='top',
                                    s=f'{filler_2}closed {decision_type} {decision} from '
                                      f'{feat_data[labels_i][feat_df_idx["datetime"]].strftime("%Y-%m-%d %H:%M")}\nprofit: {profit}')

                if f'ticks_till_best_profit_{label_name}' in labels:
                    ticks = int(labels_data[labels_i][labels_df_idx[f'ticks_till_best_profit_{label_name}']]) 
                    txt.append(f'ticks till best: {ticks}')
                    lines+=1

                if 'causes' in labels and not printed_causes:
                    causes = labels_data[labels_i][labels_df_idx['causes']] 
                    txt.append(f'causes: {causes}')
                    printed_causes = True
                    lines+=1
                
                txt = '\n'.join(txt)
                ax.plot(i, close, marker='o', markersize=12, color='black')
                ax.text(x=i, y=close, color=color, verticalalignment='top',
                        s=txt)
            
                if i in verts_occupied:
                    verts_occupied[i] += lines
                else:
                    verts_occupied[i] = lines

def add_extra_data_to_plot(ax, extra_df, plot_range):
    start, stop = plot_range
    extra_df = extra_df.iloc[start:stop+1]
    for col in extra_df:
        ax.plot(extra_df[col].to_numpy(), label=col)

# define helper functions and classes for generating features

In [4]:
def add_features(df, inplace=False, negative_bears=True, include_most_recent_feats=False):
    
    ### temporal features
    
    quarters = []
    days_of_week = []
    months = []
    days = []
    minutes = []
    hours = []
    years = []
    
    ### ichimoku features
    
    is_price_above_cb_lines = []
    is_price_above_cloud = []
    is_price_inside_cloud = []
    is_price_below_cloud = []
    cloud_breakout_bull = []
    cloud_breakout_bear = []
    ticks_since_cloud_breakout_bull = []
    ticks_since_cloud_breakout_bear = []
    
    first_kumo_breakout_bull = False
    first_kumo_breakout_bear = False
    
    # names of each cross type
    cross_names = ['tk_cross','tk_price_cross','senkou_cross','chikou_cross']
    # dict to hold similar features of each cross type
    crosses_dict = {} 
    for name in cross_names:
        crosses_dict[name] = {
            'most_recent_bull_strength': [],
            'most_recent_bear_strength': [],
            'bull_strength': [],
            'bear_strength': [],
            'ticks_since_bull': [],
            'ticks_since_bear': [],
            'most_recent_bull_length': [],
            'most_recent_bear_length': [],
            'bull_length': [],
            'bear_length': [],
            'first_bull': False,
            'first_bear': False
        }
    
    data = df.to_numpy()
    feature_indices = {df.columns[i]:i for i in range(len(df.columns))}
    
    fg = FeatuteGenerator(data, feature_indices)
    for i in range(len(data)):
        # get temporal features signals
        temporal_features = fg.get_temporal_features(i)
        quarters.append(temporal_features.quarter)
        days_of_week.append(temporal_features.day_of_week)
        months.append(temporal_features.month)
        days.append(temporal_features.day)
        minutes.append(temporal_features.minute)
        hours.append(temporal_features.hour)
        years.append(temporal_features.year)
        
        # get ichimoku signals
        ichimoku_features = fg.get_ichimoku_features(i, cross_length_limit=np.Inf)
        is_price_above_cb_lines.append(ichimoku_features['is_price_above_cb_lines'])
        is_price_above_cloud.append(ichimoku_features['is_price_above_cloud'])
        is_price_inside_cloud.append(ichimoku_features['is_price_inside_cloud'])
        is_price_below_cloud.append(ichimoku_features['is_price_below_cloud'])
        
        # handle kumo breakout
        cloud_breakout_bull.append(ichimoku_features['cloud_breakout_bull'])
        cloud_breakout_bear.append(ichimoku_features['cloud_breakout_bear']) 
        
        if ichimoku_features['cloud_breakout_bull']:
            first_kumo_breakout_bull = True
        if ichimoku_features['cloud_breakout_bear']:
            first_kumo_breakout_bear = True
        
        if first_kumo_breakout_bull:
            if ichimoku_features['cloud_breakout_bull']:
                ticks_since_cloud_breakout_bull.append(0)
            else:
                ticks_since_cloud_breakout_bull.append(ticks_since_cloud_breakout_bull[-1] + 1)
        else:
            ticks_since_cloud_breakout_bull.append(None)
        
        if first_kumo_breakout_bear:
            if ichimoku_features['cloud_breakout_bear']:
                ticks_since_cloud_breakout_bear.append(0)
            else:
                ticks_since_cloud_breakout_bear.append(ticks_since_cloud_breakout_bear[-1] + 1)
        else:
            ticks_since_cloud_breakout_bear.append(None)
        
        # handle other ichimoku cloud crosses
        for cross_name in crosses_dict:
            cross_dict = crosses_dict[cross_name]
            
            bull_strength, bear_strength, cross_length = ichimoku_features[cross_name]
            
            if bull_strength > 0:
                cross_dict['first_bull'] = True 
            if bear_strength > 0:
                cross_dict['first_bear'] = True
            
            if cross_dict['first_bull']:
                if bull_strength > 0:
                    cross_dict['most_recent_bull_strength'].append(bull_strength)
                    cross_dict['bull_strength'].append(bull_strength)
                    cross_dict['ticks_since_bull'].append(0)
                    cross_dict['most_recent_bull_length'].append(cross_length)
                    cross_dict['bull_length'].append(cross_length)
                else:
                    cross_dict['most_recent_bull_strength'].append(cross_dict['most_recent_bull_strength'][-1])
                    cross_dict['bull_strength'].append(0)
                    cross_dict['ticks_since_bull'].append(cross_dict['ticks_since_bull'][-1] + 1)
                    cross_dict['most_recent_bull_length'].append(cross_dict['most_recent_bull_length'][-1])
                    cross_dict['bull_length'].append(0)
            else:
                cross_dict['most_recent_bull_strength'].append(None)
                cross_dict['bull_strength'].append(None)
                cross_dict['ticks_since_bull'].append(None)
                cross_dict['most_recent_bull_length'].append(None)
                cross_dict['bull_length'].append(None)
            
            if cross_dict['first_bear']:
                if bear_strength > 0: 
                    if negative_bears:
                        bear_strength *= -1
                    cross_dict['most_recent_bear_strength'].append(bear_strength)
                    cross_dict['bear_strength'].append(bear_strength)
                    cross_dict['ticks_since_bear'].append(0)
                    cross_dict['most_recent_bear_length'].append(cross_length)
                    cross_dict['bear_length'].append(cross_length)
                else:
                    cross_dict['most_recent_bear_strength'].append(cross_dict['most_recent_bear_strength'][-1])
                    cross_dict['bear_strength'].append(0)
                    cross_dict['ticks_since_bear'].append(cross_dict['ticks_since_bear'][-1] + 1)
                    cross_dict['most_recent_bear_length'].append(cross_dict['most_recent_bear_length'][-1])
                    cross_dict['bear_length'].append(0)
            else:
                cross_dict['most_recent_bear_strength'].append(None)
                cross_dict['bear_strength'].append(None)
                cross_dict['ticks_since_bear'].append(None)
                cross_dict['most_recent_bear_length'].append(None)
                cross_dict['bear_length'].append(None)
    
    if not inplace:
        df = df.copy()
    
    df['quarter'] = quarters
    df['day_of_week'] = days_of_week
    df['month'] = months
    df['day'] = days
    df['minute'] = minutes
    df['hour'] = hours
    df['year'] = years
    df['is_price_above_cb_lines'] = is_price_above_cb_lines
    df['is_price_above_cloud'] = is_price_above_cloud
    df['is_price_inside_cloud'] = is_price_inside_cloud
    df['is_price_below_cloud'] = is_price_below_cloud
    df['cloud_breakout_bull'] = cloud_breakout_bull
    df['cloud_breakout_bear'] = cloud_breakout_bear
    
    if include_most_recent_feats:
        df['ticks_since_cloud_breakout_bull'] = ticks_since_cloud_breakout_bull
        df['ticks_since_cloud_breakout_bear'] = ticks_since_cloud_breakout_bear
        
    for cross_name in crosses_dict:
        df[f'{cross_name}_bull_strength'] = crosses_dict[cross_name]['bull_strength']
        df[f'{cross_name}_bear_strength'] = crosses_dict[cross_name]['bear_strength']
        df[f'{cross_name}_bull_length'] = crosses_dict[cross_name]['bull_length']
        df[f'{cross_name}_bear_length'] = crosses_dict[cross_name]['bear_length']
        
        if include_most_recent_feats:
            df[f'{cross_name}_most_recent_bull_strength'] = crosses_dict[cross_name]['most_recent_bull_strength']
            df[f'{cross_name}_most_recent_bear_strength'] = crosses_dict[cross_name]['most_recent_bear_strength']
            df[f'{cross_name}_ticks_since_bull'] = crosses_dict[cross_name]['ticks_since_bull']
            df[f'{cross_name}_ticks_since_bear'] = crosses_dict[cross_name]['ticks_since_bear']
            df[f'{cross_name}_most_recent_bull_length'] = crosses_dict[cross_name]['most_recent_bull_length']
            df[f'{cross_name}_most_recent_bear_length'] = crosses_dict[cross_name]['most_recent_bear_length']

    return df

In [5]:
class FeatuteGenerator:
    def __init__(self, data, feature_indices):
        self.data = data
        self.feature_indices = feature_indices
#         self.rsi_divergence_range = 30
        self.last_rsi_divergence = 0 # 0 - None, 1 - bearish, 2 -  hidden bearish, 3 - bullish, 4 - hidden bullish
        self.rsi_highs = deque()
        self.rsi_lows = deque()
        self.cross_lengths = {}
        self.price_entered_tk_region_from_top = False
        self.price_entered_tk_region_from_bot = False
        self.temporal_features = namedtuple('temporal_features', 'quarter year month day day_of_week hour minute')
        self.safe_start_idx = self._end_of_missing_data_idx(['chikou_span_visual'])
    
    def get_temporal_features(self,i):
        dt = self.data[i][self.feature_indices['datetime']]
        
        features = self.temporal_features(quarter=dt.quarter, year=dt.year, month=dt.month, day=dt.day,
                                          day_of_week=dt.dayofweek, hour=dt.hour, minute=dt.minute)
        return features
    
#     def check_rsi_divergence(self,index):
#         momentum_rsi_i = self.feature_indices['momentum_rsi']
#         if not pd.isna(self.data[index][momentum_rsi_i]):
#             rsi1 = self.data[index][momentum_rsi_i]
#             rsi2 = self.data[index-1][momentum_rsi_i]
#             rsi3 = self.data[index-2][momentum_rsi_i]
#             if rsi1 < rsi2 and rsi3 < rsi2:
#                 self.rsi_highs.appendleft((index-1,rsi2))
#                 for high in self.rsi_highs:
#                     if high[0] == index-1:
#                         continue
                    
#             elif rsi1 > rsi2 and rsi3 > rsi2:
#                 self.rsi_lows.appendleft((index-1,rsi2))

#             if len(self.rsi_highs) > 0:
#                 if self.rsi_highs[0][0] < index - self.rsi_divergence_range:
#                     self.rsi_highs.pop()
#             if len(self.rsi_lows) > 0:
#                 if self.rsi_lows[0][0] < index - self.rsi_divergence_range:
#                     self.rsi_lows.pop()           
    
    def get_ichimoku_features(self, i, cross_length_limit = 1):
        is_price_above_cb_lines = None
        is_price_above_cloud = None
        is_price_inside_cloud = None
        is_price_below_cloud = None
        cloud_top = None
        cloud_bottom = None

        # cross signals represented as tuples: (bullish strength, bearish strength, cross length)
        # - cross signal strength indicated by 0, 1, 2, 3 for none, weak, neutral, strong
        #    or just 0, 1, 3 for none, weak, strong
        # - cross length is just the number of ticks the cross occured over
        tk_cross = (0,0,0)
        tk_price_cross = (0,0,0)
        senkou_cross = (0,0,0)
        chikou_cross = (0,0,0)
        cloud_breakout_bull = False
        cloud_breakout_bear = False
        
        close = self.feature_indices['Close']
        trend_visual_ichimoku_a = self.feature_indices['trend_visual_ichimoku_a']
        trend_visual_ichimoku_b = self.feature_indices['trend_visual_ichimoku_b']
        trend_ichimoku_a = self.feature_indices['trend_ichimoku_a']
        trend_ichimoku_b = self.feature_indices['trend_ichimoku_b']
        trend_ichimoku_conv = self.feature_indices['trend_ichimoku_conv']
        trend_ichimoku_base = self.feature_indices['trend_ichimoku_base']
        chikou_span = self.feature_indices['chikou_span']
        
        cloud_top, cloud_bottom = self._get_top_and_bottom_line_idx(trend_visual_ichimoku_a,trend_visual_ichimoku_b,i)

        if not pd.isna(self.data[i][trend_ichimoku_conv]) and not pd.isna(self.data[i][trend_ichimoku_base]):
            if self.data[i][close] > self.data[i][trend_ichimoku_conv] and self.data[i][close] > self.data[i][trend_ichimoku_base]:
                is_price_above_cb_lines = True
            else:
                is_price_above_cb_lines = False
            
            if self._is_line_between_region(close, cloud_top, cloud_bottom, i):
                is_price_inside_cloud = True
                is_price_above_cloud = False
                is_price_below_cloud = False
            else:
                is_price_inside_cloud = False
                if self.data[i][close] <= self.data[i][cloud_bottom]:
                    is_price_above_cloud = False
                    is_price_below_cloud = True
                else:
                    is_price_above_cloud = True
                    is_price_below_cloud = False
        
        ### check for crosses
        
        if i >= self.safe_start_idx:
            
            ### tk cross
            
            cross, length, top_line_i, bottom_line_i = \
                self._get_cross_and_length('tk_cross', trend_ichimoku_conv,trend_ichimoku_base,i)
            
            # price cross clean through both tk region (cross == 2), or price cross through both 
            # tk region over limited amout of ticks (cross == 3 and length <= cross_length_limit)
            if cross == 2 \
                    or (cross == 3 and length <= cross_length_limit):
                
                # bullish
                if top_line_i == trend_ichimoku_conv:
                    if self._is_line_between_region(top_line_i,cloud_top,cloud_bottom,i) \
                            and self._is_line_between_region(bottom_line_i,cloud_top,cloud_bottom,i):
                        tk_cross = (2,0,length)
                    elif self.data[i][bottom_line_i] >= self.data[i][cloud_top]:
                        tk_cross = (3,0,length)
                    else:
                        tk_cross = (1,0,length)
                # bearish
                elif top_line_i == trend_ichimoku_base:
                    if self._is_line_between_region(top_line_i,cloud_top,cloud_bottom,i) \
                            and self._is_line_between_region(bottom_line_i,cloud_top,cloud_bottom,i):
                        tk_cross = (0,2,length)
                    elif self.data[i][top_line_i] <= self.data[i][cloud_bottom]:
                        tk_cross = (0,3,length)
                    else:
                        tk_cross = (0,1,length)
                else:
                    print('weird 5:', self.data[i][self.feature_indices['datetime']])
                
            ### tk price cross
            
            cross_res = self._get_cross_and_length_regions('tk_price_cross', trend_ichimoku_conv, trend_ichimoku_base,
                                                            close, close, i)
            cross, length, first_line, second_line, third_line, fourth_line = cross_res
            
            if cross == 2 or (cross == 3 and length <= cross_length_limit):
                
                # "It’s a noise zone when price is in the Cloud"
                #  https://www.tradeciety.com/the-complete-ichimoku-trading-guide-how-to-use-the-ichimoku-indicator/
                
                # bullish 
                if first_line == close:
                    if self.data[i][close] >= self.data[i][cloud_top]:
                        tk_price_cross = (3,0,length)
                    elif self.data[i][close] <= self.data[i][cloud_bottom]:
                        tk_price_cross = (1,0,length)
                # bearish
                elif fourth_line == close:
                    if self.data[i][close] >= self.data[i][cloud_top]:
                        tk_price_cross = (0,1,length)
                    elif self.data[i][close] <= self.data[i][cloud_bottom]:
                        tk_price_cross = (0,3,length)
            elif cross == 3 and length>cross_length_limit:
                print(f'cross type = {cross}, cross length = {length}, {self.data[i][self.feature_indices["datetime"]]}')
            
            ### cloud (senkou) cross
            
            # As the Senkou Spans are projected forward, the cross that triggers this signal will be 26 days ahead of the 
            # price and, hence, the actual date that the signal occurs.  The strength of the signal is determined by the 
            # relationship of the price on the date of the signal (not the trigger) to the Kumo (Cloud)
            # - https://www.ichimokutrader.com/signals.html
            
            cross, length, top_line_i, bottom_line_i = \
                self._get_cross_and_length('cloud_cross', trend_ichimoku_a, trend_ichimoku_b,i)
            
            if cross == 2 \
                    or (cross == 3 and length <= cross_length_limit):
                
                # bullish
                if top_line_i == trend_ichimoku_a:
                    if self._is_line_between_region(close,cloud_top,cloud_bottom,i):
                        senkou_cross = (2,0,length)
                    elif self.data[i][close] >= self.data[i][cloud_top]:
                        senkou_cross = (3,0,length)
                    else:
                        senkou_cross = (1,0,length)
                # bearish
                elif top_line_i == trend_ichimoku_b:
                    if self._is_line_between_region(close,cloud_top,cloud_bottom,i):
                        senkou_cross = (0,2,length)
                    elif self.data[i][close] <= self.data[i][cloud_bottom]:
                        senkou_cross = (0,3,length)
                    else:
                        senkou_cross = (0,1,length)
                else:
                    print('weird 55:', self.data[i][self.feature_indices['datetime']])
                
            ### chikou span cross

            # Note (1) that the Chikou Span must be rising when it crosses to above the price for a bull signal 
            # and falling when it crosses to below for a bear signal; just crossing the price alone is not 
            # sufficient to trigger the signal. (2) As the Chikou Span is the closing price shifted into the past, 
            # the cross that triggers this signal will be 26 days behind the price and, hence, the actual date 
            # that the signal occurs.The strength of the signal is determined by the relationship of the price 
            # on the date of the signal (not the trigger) to the Kumo (Cloud).
            # - https://www.ichimokutrader.com/signals.html
            
            # remember the chikou_span at this point is just the price 26 (or whatever chikou/senkou projection is) ticks ago
            cross, length, top_line_i, bottom_line_i = \
                self._get_cross_and_length('chikou_cross', chikou_span, close, i)
            
            if cross == 2 \
                    or (cross == 3 and length <= cross_length_limit):
                # bullish
                if top_line_i == close:
                    if self._is_line_between_region(close, cloud_top, cloud_bottom, i):
                        chikou_cross = (2,0,length)
                    elif self.data[i][close] > self.data[i][cloud_top]:
                        chikou_cross = (3,0,length)
                    else:
                        chikou_cross = (1,0,length)
                # bearish
                elif top_line_i == chikou_span:
                    if self._is_line_between_region(close, cloud_top, cloud_bottom, i):
                        chikou_cross = (0,2,length)
                    elif self.data[i][close] < self.data[i][cloud_bottom]:
                        chikou_cross = (0,3,length)
                    else:
                        chikou_cross = (0,1,length)
                else:
                    print('weird 6:', self.data[i][self.feature_indices['datetime']])
            
            ### kumo (cloud) breakout
            
            cross_res = self._get_cross_and_length_regions('kumo_breakout', cloud_top, cloud_bottom,
                                                            close, close, i)
            cross, length, first_line, second_line, third_line, fourth_line = cross_res
            
            # The Kumo Breakout signal occurs when the price leaves or crosses the Kumo (Cloud), which is why
            # we also want to check for if cross == 4 (end of overlap but not a cross)
            # - https://www.ichimokutrader.com/signals.html
            if cross == 2 or cross == 3 or cross == 4:
                # bullish 
                if first_line == close:
                    cloud_breakout_bull = True
                # bearish
                elif fourth_line == close:
                    cloud_breakout_bear = True
        
        features = {
            'is_price_above_cb_lines': is_price_above_cb_lines,
            'is_price_above_cloud': is_price_above_cloud,      
            'is_price_inside_cloud': is_price_inside_cloud,   
            'is_price_below_cloud': is_price_below_cloud,   
            'cloud_top': cloud_top,   
            'cloud_bottom': cloud_bottom,   
            'tk_cross': tk_cross,   
            'tk_price_cross': tk_price_cross,   
            'senkou_cross': senkou_cross,   
            'chikou_cross': chikou_cross,   
            'cloud_breakout_bull': cloud_breakout_bull,
            'cloud_breakout_bear': cloud_breakout_bear
        }
        
        return features
    
    def _end_of_missing_data_idx(self, exluded_features):
        exluded_features = set(exluded_features)
        safe_idx = None

        for i in range(len(self.data)):
            nan_in_row = False
            
            for feature in self.feature_indices:
                if feature in exluded_features:
                    continue
                    
                feature_i = self.feature_indices[feature]
                if isinstance(self.data[i][feature_i], float) and np.isnan(self.data[i][feature_i]):
                    safe_idx = None
                    nan_in_row = True
                    break
            
            if not nan_in_row and not safe_idx:
                safe_idx = i
        
        # add 1 because we are looking for crosses and need to look back one tick in order to do so
        return safe_idx + 1
    
    def _get_top_and_bottom_line_idx(self,line1_i,line2_i,i):
        """
        line1_i is top if line values are equal
        """
        top_line_i = line1_i
        bottom_line_i = line2_i
        if self.data[i][line1_i] < self.data[i][line2_i]:
            top_line_i = line2_i
            bottom_line_i = line1_i
        return top_line_i, bottom_line_i
    
    def _is_line_between_region(self,target_line_i,top_line_i,bottom_line_i,i):
        if self.data[i][target_line_i] > self.data[i][bottom_line_i] \
            and self.data[i][target_line_i] < self.data[i][top_line_i]:
            return True
        return False
    
    def _get_cross_and_length_regions(self, cross_name, r1_line1, r1_line2, r2_line1, r2_line2, i):
        """
        cross type can be: no cross '=' (0), start of overlap '>' (1), full cross 'X' (2), end of cross '<' (3),
            or end of overlap w/ no cross (4)
        """
        
        old_r1_top, old_r1_bot = self._get_top_and_bottom_line_idx(r1_line1,r1_line2,i-1)
        old_r2_top, old_r2_bot = self._get_top_and_bottom_line_idx(r2_line1,r2_line2,i-1)
        
        r1_top, r1_bot = self._get_top_and_bottom_line_idx(r1_line1,r1_line2,i)
        r2_top, r2_bot = self._get_top_and_bottom_line_idx(r2_line1,r2_line2,i)
        
        # defines lines from top to bottom between both regions
        sorted_regions_lines = sorted([(line, self.data[i][line]) for line in [r1_top,r1_bot,r2_top,r2_bot]],
                                     key = lambda line_tuple: line_tuple[1], reverse=True)
        first_line, second_line, third_line, fourth_line = sorted_regions_lines
        
        ### check for no cross
        
        old_top_region_bot = None
    
        # region 1 is fully above region 2
        if self.data[i-1][old_r1_bot] > self.data[i-1][old_r2_top]:
            old_top_region_bot = old_r1_bot
            if self.data[i][r1_bot] > self.data[i][r2_top]:
                return 0, 0, first_line[0], second_line[0], third_line[0], fourth_line[0]
        # region 2 is fully above region 1
        elif self.data[i-1][old_r2_bot] > self.data[i-1][old_r1_top]:
            old_top_region_bot = old_r2_bot
            if self.data[i][r2_bot] > self.data[i][r1_top]:
                return 0, 0, first_line[0], second_line[0], third_line[0], fourth_line[0]
        
        ### check for full cross
        
        # region 1 crossed to below region 2
        if self.data[i-1][old_r1_bot] > self.data[i-1][old_r2_top] \
                and self.data[i][r1_top] < self.data[i][r2_bot]:
            return 2, 0, first_line[0], second_line[0], third_line[0], fourth_line[0]
        
        # region 2 crossed to below region 1
        elif self.data[i-1][old_r2_bot] > self.data[i-1][old_r1_top] \
                and self.data[i][r2_top] < self.data[i][r1_bot]:
            return 2, 0, first_line[0], second_line[0], third_line[0], fourth_line[0]
        
        ### check for start of overlap
        
        top_region_top = top_region_bot = bot_region_top = bot_region_bot = None
        # region 1 is highest
        if self.data[i][r1_top] > self.data[i][r2_top]:
            top_region_top = r1_top
            top_region_bot = r1_bot
            bot_region_top = r2_top
            bot_region_bot = r2_bot
        # region 2 is highest
        else:
            top_region_top = r2_top
            top_region_bot = r2_bot
            bot_region_top = r1_top
            bot_region_bot = r1_bot

        # checking for start of overlap
        if cross_name not in self.cross_lengths:  
            # if the bottom line of the top region is still not defined then just consider no cross 
            if not old_top_region_bot:
                return 0, 0, first_line[0], second_line[0], third_line[0], fourth_line[0]  
            else:
                # one region is beginning to intertwine or completely swallow the other, regardless this counts
                # as the start of an overlap
                if self.data[i][bot_region_top] <= self.data[i][top_region_top] \
                        and self.data[i][bot_region_top] >= self.data[i][top_region_bot]:
                    self.cross_lengths[cross_name] = (0, old_top_region_bot)
                    return 1, 0, first_line[0], second_line[0], third_line[0], fourth_line[0]
                print('weird 11:', self.data[i][self.feature_indices['datetime']])
        else:
            # check for continuation of overlap
            if self.data[i][bot_region_top] <= self.data[i][top_region_top] \
                    and self.data[i][bot_region_top] >= self.data[i][top_region_bot]:
                self.cross_lengths[cross_name] = (self.cross_lengths[cross_name][0] + 1, self.cross_lengths[cross_name][1])
                return 0, self.cross_lengths[cross_name][0], first_line[0], second_line[0], third_line[0], fourth_line[0]
            # otherwise, 1 region must be completely above the other
            else:
                original_top_region_bot = self.cross_lengths[cross_name][1]
                res = None
                
                # check for end of cross
                if original_top_region_bot != top_region_bot and original_top_region_bot != top_region_top:
                    res = 3
                # otherwise, end of overlap w/ no cross
                else:
                    res = 4
                
                cross_length = self.cross_lengths[cross_name][0]
                del self.cross_lengths[cross_name]
                return res, cross_length, first_line[0], second_line[0], third_line[0], fourth_line[0]

            
    def _get_cross_and_length(self, cross_name, line_index1, line_index2, index):
        """
        cross type can be: no cross '=' (0), start of overlap '>' (1), full cross 'X' (2), end of cross '<' (3),
            or end of overlap w/ no cross (4)
        """
        
        # remember that if lines are of equal values the first line argument to _get_top_and_bottom_line_idx()
        # will be returned as the top line
        old_top_line_i, old_bottom_line_i = self._get_top_and_bottom_line_idx(line_index1,line_index2,index - 1)
        top_line_i, bottom_line_i = self._get_top_and_bottom_line_idx(line_index1,line_index2,index)
        
        ## check for no cross
        
        if self.data[index][line_index1] != self.data[index][line_index2] \
                and self.data[index-1][line_index1] != self.data[index-1][line_index2] \
                and old_top_line_i == top_line_i \
                and bottom_line_i == old_bottom_line_i:
            return 0, 0, top_line_i, bottom_line_i

        ## check for full cross
        
        if old_top_line_i != top_line_i \
                and self.data[index-1][old_top_line_i] > self.data[index-1][old_bottom_line_i] \
                and self.data[index][old_top_line_i] < self.data[index][old_bottom_line_i]:
            return 2, 0, top_line_i, bottom_line_i
        
        ##check for start of overlap
        
        if cross_name not in self.cross_lengths:
            if self.data[index][line_index1] == self.data[index][line_index2]:
                self.cross_lengths[cross_name] = (0, old_top_line_i,self.data[index][self.feature_indices['datetime']])
                return 1, 0, top_line_i, bottom_line_i
            print('weird 1:', self.data[index][self.feature_indices['datetime']])
        else:
            
            ## check for continuation of overlap
            
            if self.data[index][line_index1] == self.data[index][line_index2]:
                self.cross_lengths[cross_name] = (self.cross_lengths[cross_name][0] + 1, self.cross_lengths[cross_name][1]
                                                  ,self.cross_lengths[cross_name][2])
                return 0, self.cross_lengths[cross_name][0], top_line_i, bottom_line_i
            else:
                cross_old_top_line_i = self.cross_lengths[cross_name][1]
                res = None

                ## check for end of cross
                
                if cross_old_top_line_i != top_line_i:
                    res = 3
                # otherwise, end of overlap w/ no cross
                else:
                    res = 4 

                cross_length = self.cross_lengths[cross_name][0]
                del self.cross_lengths[cross_name]
                return res,cross_length, top_line_i, bottom_line_i

# trying loading data from mt5 terminal w/ ForexMachine package

In [None]:
tick_data_filepath = gi.download_mt5_data("EURUSD", 'H1', '2012-01-02', '2020-06-06')

In [None]:
model_config = {
    'current_model':'ichi_cloud',
    'ichi_cloud':{
        'indicators': {
            'ichimoku': {
                'tenkan_period': 9,
                'kijun_period': 26,
                'chikou_period': 26,
                'senkou_b_period': 24
            },
            'rsi': {
                'periods': 14
            }
        }
    }
}
data_with_indicators_2 = gi.add_indicators_to_raw(filepath=tick_data_filepath, 
                                                  save_to_disk=False, 
                                                  config=model_config, 
                                                  has_headers=True,
                                                  datetime_col='datetime',
                                                  file_save_name='testing1')
data_with_ichi_2 = add_features(data_with_indicators_2)
data_with_ichi_2.tail(10)

In [None]:
crosses = ['tk_cross', 'tk_price_cross', 'senkou_cross', 'chikou_cross', 'kumo_breakout']
crosses = ['kumo_breakout']
show_data_from_range(data_with_ichi_2, '2019-01-01', '2019-02-04', main_indicator='ichimoku', sub_indicators=['rsi'], visualize_crosses=True, crosses=crosses)

In [None]:
filepath = gi.save_data_with_indicators(data_with_ichi_2,filename=f'ichimoku_sigs-{tick_data_filepath.stem}')
str(filepath)

# backtest tutorial

### high level process of using backtrader:

```python
class MyStrategy(bt.Strategy):
    def next(self):
        pass

# Instantiate Cerebro engine
cerebro = bt.Cerebro()

# add strategy to cerebro
cerebro.addstrategy(MyStrategy)

# run cerebro engine
cerebro.run()
```

In [None]:
import backtrader as bt

In [None]:
class PrintClose(bt.Strategy):
    def __init__(self):
        # keep a reference to the "close" line in the data[0] dataseries
        self.dataclose = self.datas[0].close
    
    def log(self, txt, dt=None):
        dt = dt or self.datas[0].datetime.date(0)
        print(f'{dt.isoformat()}, {txt}')
    
    def next(self):
        # Simply log the closing price of the series from the reference
        self.log(f'Close: {self.dataclose[0]}')

class MAcrossover(bt.Strategy):
    # Moving average parameters
    params = (('pfast', 20), ('pslow', 50))
    
    def __init__(self):
        self.dataclose = self.datas[0].close
        
        # Order variable will contain ongoing order details/status
        self.order = None
        
        # Instantiate moving averages
        self.slow_sma = bt.indicators.MovingAverageSimple(self.datas[0], 
                        period=self.params.pslow)
        self.fast_sma = bt.indicators.MovingAverageSimple(self.datas[0], 
                        period=self.params.pfast)
    
    def log(self, txt, dt=None):
        dt = dt or self.datas[0].datetime.date(0)
        print(f'{dt.isoformat()} {txt}') # Comment this line when running optimization
        
    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # An activate buy/sell order has been submitted/accepted - Nothing to do
            return
        
        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log(f'BUY EXECUTED, size: {order.executed.size}, price: {order.executed.price}, cost: {order.executed.value}, commision: {order.executed.comm}')
                print(f'current balance: {self.broker.getvalue()}')
            elif order.issell():
                self.log(f'SELL EXECUTED, size: {order.executed.size}, price: {order.executed.price}, cost: {order.executed.value}, commision: {order.executed.comm}')
                print(f'current balance: {self.broker.getvalue()}')
            self.bar_executed = len(self)
        
        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.log('Order Canceled/Margin/Rejected')
        
        # Reset orders
        self.order = None
    
    def notify_trade(self, trade):
        if not trade.isclosed:
            return
        
        print()
        self.log(f'OPERATION PROFIT, GROSS: {trade.pnl}, NET: {trade.pnlcomm}')
        print(f'open dt: {trade.open_datetime()} close dt: {trade.close_datetime()}')
        print(f'close price: {trade.price}')
        print(f'bar opened: {trade.baropen}, bar closed: {trade.barclose}')
        print(f'number of bars trade was open for: {trade.barlen}')
        print(f'current balance: {self.broker.getvalue()}')
        print()
    
    def next(self):
        # Check if an order is pending ... if yes, we cannot send a 2nd one
        if self.order:
#             print(f'order value: {order.value}')
#             print(f'order pprice: {order.pprice}')
#             print(f'order psize: {order.psize}')
#             print(f'order pnl: {order.pnl}')
            return
        
        # Check if we are in the market
        if not self.position:
            # We are not in the market, look for a signal to OPEN trades
            
            #If the 20 SMA is above the 50 SMA
            if self.fast_sma[0] > self.slow_sma[0] and self.fast_sma[-1] < self.slow_sma[-1]:
                self.log(f'BUY CREATE {self.dataclose[0]}')
                # Keep track of the created order to avoid a 2nd order
                self.order = self.buy()
            #Otherwise if the 20 SMA is below the 50 SMA
            elif self.fast_sma[0] < self.slow_sma[0] and self.fast_sma[-1] > self.slow_sma[-1]:
                self.log(f'SELL CREATE {self.dataclose[0]}')
                # Keep track of the created order to avoid a 2nd order
                self.order = self.sell()
        else:
            # We are already in the market, look for a signal to CLOSE trades
            if len(self) >= (self.bar_executed + 5):
                self.log(f'CLOSE CREATE {self.dataclose[0]}')
                self.order = self.close()

In [None]:
# Instantiate Cerebro engine
cerebro = bt.Cerebro()

total_time_delta = data_with_ichi_2.iloc[-1,0] - data_with_ichi_2.iloc[0,0]
from1, to1 = data_with_ichi_2.iloc[0,0], data_with_ichi_2.iloc[0,0] + total_time_delta / 2
from2, to2 = data_with_ichi_2.iloc[0,0] + total_time_delta / 2, data_with_ichi_2.iloc[-1,0]

forex_data1 = bt.feeds.GenericCSVData(
    dataname=str(filepath),
    datetime=0,
    open=1,
    high=2,
    low=3,
    close=4,
    volume=-1,
    openinterest=-1,
    fromdate=from1.to_pydatetime(),
    todate=to1.to_pydatetime(),
    timeframe=bt.TimeFrame.Minutes
)

forex_data2 = bt.feeds.GenericCSVData(
    dataname=str(filepath),
    datetime=0,
    open=1,
    high=2,
    low=3,
    close=4,
    volume=-1,
    openinterest=-1,
    fromdate=from2.to_pydatetime(),
    todate=to2.to_pydatetime(),
    timeframe=bt.TimeFrame.Minutes
)

# add data to cerebro to read over
cerebro.adddata(forex_data1)

# add strategy to cerebro
cerebro.addstrategy(MAcrossover)

# Default position size
cerebro.addsizer(bt.sizers.SizerFix, stake=5000)

In [None]:
start_balance = cerebro.broker.getvalue()

cerebro.run()

end_balance = cerebro.broker.getvalue()

pnl = end_balance - start_balance
print(f'Starting Portfolio Value: {start_balance}')
print(f'Final Portfolio Value: {end_balance}')
print(f'PnL: {pnl}')

# helper preprocessing functions

In [6]:
def disregard_rows_with_missing_data(x_df, y_df=None, ignored_x_cols=None, ignored_y_cols=None, seperate_chunks=False):
    if y_df is not None:
        if x_df.shape[0] != y_df.shape[0]:
            print(f'x_df (rows={x_df.shape[0]}) and y_df (rows={y_df.shape[0]}) do not have the same number of rows')
            return
    
    if ignored_x_cols:
        ignored_x_cols = set([x_df.columns.get_loc(col_name) for col_name in ignored_x_cols])
    else:
        ignored_x_cols = set()
        
    if y_df is not None:
        if ignored_y_cols:
            ignored_y_cols = set([y_df.columns.get_loc(col_name) for col_name in ignored_y_cols])
        else:
            ignored_y_cols = set()
        
    wanted_data = []
    cur_x_data = []
    cur_y_data = []
    
    x_data = x_df.to_numpy()
    if y_df is not None:
        y_data = y_df.to_numpy()    
    
    for i in range(len(x_data)):
        missing_data = False
        for j in range(len(x_data[i])):
            if j not in ignored_x_cols and pd.isnull(x_data[i][j]):
                missing_data = True
                break
        
        if y_df is not None and not missing_data:
            for j in range(len(y_data[i])):
                if j not in ignored_y_cols and pd.isnull(y_data[i][j]):
                    missing_data = True
                    break
        
        if not missing_data:
            cur_x_data.append(x_data[i])
            if y_df is not None:
                cur_y_data.append(y_data[i])
        elif seperate_chunks and len(cur_x_data) > 0:
            if y_df is not None:
                wanted_data.append((pd.DataFrame(cur_x_data, columns=x_df.columns), pd.DataFrame(cur_y_data, columns=y_df.columns)))
            else:
                wanted_data.append((pd.DataFrame(cur_x_data, columns=x_df.columns), None))
            cur_x_data = []
            cur_y_data = []
    
    if len(cur_x_data) > 0:
        if y_df is not None:
            wanted_data.append((pd.DataFrame(cur_x_data, columns=x_df.columns), pd.DataFrame(cur_y_data, columns=y_df.columns)))
        else:
            wanted_data.append((pd.DataFrame(cur_x_data, columns=x_df.columns), None))
    
    return wanted_data

def dummy_and_remove_features(data_df, categories_dict={}, cols_to_remove=[], include_defaults=True, keep_datetime=False):
    if include_defaults:
        cd = {
            'quarter': [1,2,3,4],
            'day_of_week': [0,1,2,3,4]
        }
        
        cols = {'spread','momentum_rsi', 'month', 'day', 'minute', 'hour', 'year', 'chikou_span_visual','chikou_span', 
                'tk_cross_bull_length', 'tk_cross_bear_length', 
                'tk_price_cross_bull_length', 'tk_price_cross_bear_length', 
                'senkou_cross_bull_length', 'senkou_cross_bear_length', 
                'chikou_cross_bull_length', 'chikou_cross_bear_length',
                'trend_visual_ichimoku_a','trend_visual_ichimoku_b',}
                #'trend_ichimoku_base','trend_ichimoku_conv', 'trend_ichimoku_a', 'trend_ichimoku_b'}
        
        if not keep_datetime:
            cols.add('datetime')
        
        cd.update(categories_dict) 
        categories_dict = cd
        cols_to_remove = set(cols_to_remove) | cols     # prios keys/vals from 2nd arg of | (or) operand
        data_df_cols_set = set(data_df.columns)
        
        categories_dict = {key: categories_dict[key] for key in categories_dict if key in data_df_cols_set}
        cols_to_remove = [col for col in cols_to_remove if col in data_df_cols_set]
    
    if len(categories_dict) > 0:
        catagorical_cols = list(categories_dict.keys())
        categories = list(categories_dict.values())

        cols_to_dummy = data_df[catagorical_cols]
        cols_to_dummy_vals = cols_to_dummy.to_numpy()  

        dummy_enc = OneHotEncoder(categories=categories, drop='first')
        dummied_vals = dummy_enc.fit_transform(cols_to_dummy_vals).toarray()
        dummy_col_names = dummy_enc.get_feature_names(catagorical_cols)

        dummied_cols_df = pd.DataFrame(dummied_vals, columns=dummy_col_names, index=data_df.index)
        data_df = pd.concat((data_df, dummied_cols_df), axis=1)

        cols_to_remove.extend(catagorical_cols)
        
    data_df = data_df.drop(cols_to_remove, axis=1)
    
    return data_df

def convert_class_labels(y_df, to_ints=True, labels_dict=None, to_numpy=False):
    if to_ints:
        if labels_dict is None:
            unique_labels = np.unique(y_df.to_numpy())
            labels_to_int = {unique_labels[i]: i  for i in range(len(unique_labels))}
        else:
            labels_to_int = {labels_dict[i]: i for i in labels_dict}
        
        new_labels = []
        for v in y_df.to_numpy():
            v = v[0]
            new_labels.append(labels_to_int[v])
        
        if not to_numpy:
            new_labels = pd.DataFrame(new_labels, columns=y_df.columns)
        else:
            new_labels = np.array(new_labels)
        
        if labels_dict is None:
            labels_dict = {labels_to_int[label]: label for label in labels_to_int}
            
        return new_labels, labels_dict
    else:
        new_labels = []
        for v in y_df.to_numpy():
            v = v[0]
            new_labels.append(labels_dict[v])
            
        if not to_numpy:
            new_labels = pd.DataFrame(new_labels, columns=y_df.columns)
        else:
            new_labels = np.array(new_labels)
        return new_labels, labels_dict

def error_rate(y_true_df, y_pred_df):
    if y_true_df.shape[0] != y_pred_df.shape[0]:
        print(f'y_true_df (rows={y_true_df.shape[0]}) and y_pred_df (rows={y_pred_df.shape[0]}) do not have the same number of rows')
        return
    d1, d2 = y_true_df.to_numpy(), y_pred_df.to_numpy()
    wrong_indices = []
    for i in range(len(d1)):
        if d1[i] != d2[i]:
            wrong_indices.append(i)
    return len(wrong_indices)/len(d1), wrong_indices

def no_missing_data_idx_range(df, early_ending_cols=[]):
    early_ending_cols = [df.columns.get_loc(col_name) for col_name in early_ending_cols]
    early_ending_cols = set(early_ending_cols)
    data = df.to_numpy()
    start_idx = None
    end_idx = None
    for i in range(len(data)):
        missing_data = False
        for j in range(len(data[i])):
            if j not in early_ending_cols and pd.isnull(data[i][j]):
                missing_data=True
                start_idx = None
                end_idx = None
                break
            elif j in early_ending_cols and pd.isnull(data[i][j]):
                if start_idx and not end_idx:
                    end_idx = i-1
        if not start_idx and not missing_data:
            start_idx = i
    if not end_idx:
        end_idx = len(data) - 1
    return start_idx, end_idx

def normalize_data(df, train_data, groups=None, normalization_terms=None):
    df = df.copy(deep=True)
    
    if train_data:
        if not groups:
            print(f'groups must be specified if train_data is true')
            return None
        
        normalization_terms = {}
        normalized = set()
        
        for group in groups:
            min_value = min(df[group].min())
            max_value = max(df[group].max())
            dict_val = (min_value, max_value)
            
            for col in group:
                df[col] = (df[col] - min_value) / (max_value - min_value)
                
                normalization_terms[col] = dict_val
                normalization_terms[df.columns.get_loc(col)] = dict_val

            normalized = normalized.union(group)
            
        for col in df:
            if col not in normalized and df[col].dtype != bool and pd.api.types.is_numeric_dtype(df[col].dtype):
                min_value = df[col].min()
                max_value = df[col].max()
                dict_val = (min_value, max_value)
                
                if min_value != max_value:
                    df[col] = (df[col] - min_value) / (max_value - min_value)
                elif min_value > 1 or min_value < 0:
                    df[col] = [0] * df.shape[0]
                
                normalization_terms[col] = dict_val
                normalization_terms[df.columns.get_loc(col)] = dict_val
                
    else:
        if not normalization_terms:
            print(f'normalization_terms must be specified if train_data is false')
            return None
        
        for col in df:
            if col in normalization_terms:
                min_value, max_value = normalization_terms[col]
                
                if min_value != max_value:
                    df[col] = (df[col] - min_value) / (max_value - min_value)
                elif min_value > 1 or min_value < 0:
                    df[col] = [0] * df.shape[0]
                
    return df, normalization_terms

def normalize_data_list(row, normalization_terms):
    new_row = []
    for col_i in range(len(row)):
        if col_i in normalization_terms:
            min_value, max_value = normalization_terms[col_i]
            
            if min_value != max_value:
                normalized = (row[col_i] - min_value) / (max_value - min_value)
            elif min_value > 1 or min_value < 0:
                normalized = 0
            
            new_row.append(normalized)
        else:
            new_row.append(row[col_i])
    return new_row
                
def apply_perc_change(df, cols, limit=None):
    df = df.copy(deep=True)
    for col in cols:
        df[col] = df[col].pct_change(limit=limit)
    return df

def apply_perc_change_list(last_row, cur_row, cols_set):
    new_row = []
    for col in range(len(last_row)):
        if col in cols_set:
            pc = (cur_row[col]/last_row[col]) - 1
            new_row.append(pc)
        else:
            new_row.append(cur_row[col])
    return new_row

def apply_moving_avg(df, cols, window):
    df = df.copy(deep=True)
    df[cols] = df[cols].rolling(window).mean()
    return df

def apply_moving_avg_q(q, cols_set):
    n_rows, n_cols = len(q), len(q[0])
    new_row = []
    for col in range(n_cols):
        if col in cols_set:
            avg = sum([row[col] for row in q]) / n_rows
            new_row.append(avg)
        else:
            new_row.append(q[-1][col])
    return new_row

def missing_labels_preprocess(x_df, y_df, y_col):
    x_df = dummy_and_remove_features(x_df)
    if y_df is not None:
        res = disregard_rows_with_missing_data(x_df, pd.DataFrame(y_df[y_col]))
    else:
        res = disregard_rows_with_missing_data(x_df, None)
    x, y = res[0]
    return x, y

def potention_profits(decisons_true, decisons_pred, decisons_true_profits):
    if decisons_true.shape[0] != decisons_pred.shape[0] != decisons_true_profits.shape[0]:
        print(f'decisons_true (rows={decisons_true.shape[0]}), decisons_pred (rows={decisons_pred.shape[0]}), and '
              f'decisons_true_profits (rows={decisons_true_profits.shape[0]}) do not have the same number of rows')
        return
    d1, d2, d3 = decisons_true.to_numpy(), decisons_pred.to_numpy(), decisons_true_profits.to_numpy()
    potential_profits = 0
    for i in range(len(d1)):
        if d1[i][0] == d2[i][0]:
             potential_profits += d3[i][0]
    return potential_profits

def get_profit(close_price, open_price, pip_value, pip_resolution, in_quote_currency):
    pips = (close_price - open_price) / pip_resolution
    # calculates profit in the quote currency (right side currency of currency pair) by default
    profit = pip_value * pips  # can be negative
    if not in_quote_currency:
        profit /= close_price
    return profit

# reference: https://www.mql5.com/en/articles/4830
def get_margin(trades, buy_label, sell_label, contract_size, leverage, tradersway_commodity, in_quote_currency, hedged_margin, trade_indices=None):
    # *_trade_tups: (lots, open price)
    
    buy_lots = 0
    sell_lots = 0
    hedged_volume_margin = 0
    uncovered_volume_margin = 0
    
    multiplier = 1
    if contract_size > hedged_margin:
        multiplier = contract_size / hedged_margin 
    
    if in_quote_currency:
        buy_price_lots = 0
        sell_price_lots = 0
        
        if trade_indices is not None:
            for trade_i in trade_indices:
                trade = trades[trade_i]
                decision_label = trade['decision_label']
                if decision_label == buy_label:
                    buy_lots += trade['lots']
                    buy_price_lots += trade['lots'] * trade['open_price']
                elif decision_label == sell_label:
                    sell_lots += trade['lots']
                    sell_price_lots += trade['lots'] * trade['open_price']
        else:
            for trade_i in trades:
                trade = trades[trade_i]
                decision_label = trade['decision_label']
                if decision_label == buy_label:
                    buy_lots += trade['lots']
                    buy_price_lots += trade['lots'] * trade['open_price']
                elif decision_label == sell_label:
                    sell_lots += trade['lots']
                    sell_price_lots += trade['lots'] * trade['open_price']

        total_lots = buy_lots + sell_lots
        wap = (buy_price_lots + sell_price_lots) / total_lots   # weighted average price

        # calculate uncovered volume margin
        if buy_lots > sell_lots:
            uncovered_lots = buy_lots - sell_lots
            uncovered_wap = buy_price_lots / buy_lots    
            uncovered_volume_margin = uncovered_wap * uncovered_lots * contract_size / leverage
        elif buy_lots < sell_lots:
            uncovered_lots = sell_lots - buy_lots
            uncovered_wap = sell_price_lots / sell_lots
            uncovered_volume_margin = uncovered_wap * uncovered_lots * contract_size / leverage

        # calculate hedged volume margin
        hedged_volume_margin = wap * min(buy_lots, sell_lots) * contract_size / multiplier / leverage
    else:
        if trade_indices is not None:
            for trade_i in trade_indices:
                trade = trades[trade_i]
                decision_label = trade['decision_label']
                if decision_label == buy_label:
                    buy_lots += trade['lots']
                elif decision_label == sell_label:
                    sell_lots += trade['lots']
        else:
            for trade_i in trades:
                trade = trades[trade_i]
                decision_label = trade['decision_label']
                if decision_label == buy_label:
                    buy_lots += trade['lots']
                elif decision_label == sell_label:
                    sell_lots += trade['lots']
        
        # calculate uncovered volume margin
        if buy_lots > sell_lots:
            uncovered_lots = buy_lots - sell_lots
            uncovered_volume_margin = uncovered_lots * contract_size / leverage
        elif buy_lots < sell_lots:
            uncovered_lots = sell_lots - buy_lots
            uncovered_volume_margin = uncovered_lots * contract_size / leverage
        
        # calculate hedged volume margin
        hedged_volume_margin = min(buy_lots, sell_lots) * contract_size / multiplier /leverage

    margin = hedged_volume_margin + uncovered_volume_margin
    if tradersway_commodity:
        margin *= 2   # idk
    return margin

In [7]:
# trades = {
#     1: {
#         'decision_label': 1,
#         'lots': 0.11,
#         'open_price': 1.22176,
#     },
#     2: {
#         'decision_label': 1,
#         'lots': 0.76,
#         'open_price': 1.22175,
#     },
#     3: {
#         'decision_label': 1,
#         'lots': 0.14,
#         'open_price': 1.22175,
#     },
#     4: {
#         'decision_label': 0,
#         'lots': 1.28,
#         'open_price': 1.22169,
#     },
#     5: {
#         'decision_label': 0,
#         'lots': 0.55,
#         'open_price': 1.22167,
#     },
# }

# get_margin(trades, buy_label=1, sell_label=0, contract_size=100000, leverage=1000, tradersway_commodity=False, in_quote_currency=True, hedged_margin=50000)

trades = {
    1: {
        'decision_label': 1,
        'lots': 1.14,
        'open_price': 1.27019,
    },
    2: {
        'decision_label': 0,
        'lots': 0.14,
        'open_price': 1.27008,
    },
    3: {
        'decision_label': 0,
        'lots': 0.51,
        'open_price': 1.27011,
    },
}

get_margin(trades, buy_label=1, sell_label=0, contract_size=100000, leverage=1000, tradersway_commodity=False, in_quote_currency=False, hedged_margin=50000,
           trade_indices=None)

81.49999999999999

# helper functions for generating labels

In [8]:
def generate_ichimoku_labels(df, min_profit_percent=0.003, profit_noise_percent=0.003, label_non_signals=False, print_debug=True,
                             signals_to_consider=None, contract_size=100_000, lots_per_trade=1, in_quote_currency=True, pip_resolution=0.0001):
    
    # when min_profit==profit_noise this turns into a binary classification problem (buy or sell, no wait)
    if profit_noise_percent == min_profit_percent:
        no_waits = True
        
    pip_value = contract_size * lots_per_trade * pip_resolution   # in quote currency (right side currency of currency pair)
    min_profit = min_profit_percent * lots_per_trade * contract_size   # in base currency (left side currency of currency pair)
    profit_noise = profit_noise_percent * lots_per_trade * contract_size   # in base currency (left side currency of currency pair) 
    
    data = df.to_numpy()
    feature_indices = {df.columns[i]:i for i in range(len(df.columns))}
    close_trade_features = []
    close_trade_labels = []
    
    # if any of these columns are equal to 0 then the corresponding signal has occured at that tick
    if not signals_to_consider:
        signals_to_consider = ['cloud_breakout_bull','cloud_breakout_bear',                       # cloud breakout
                               'tk_cross_bull_strength', 'tk_cross_bear_strength',                # Tenkan Sen / Kijun Sen Cross
                               'tk_price_cross_bull_strength', 'tk_price_cross_bear_strength',    # price crossing both the Tenkan Sen / Kijun Sen
                               'senkou_cross_bull_strength', 'senkou_cross_bear_strength',        # Senkou Span Cross
                               'chikou_cross_bull_strength', 'chikou_cross_bear_strength']        # Chikou Span Cross
    
    # find index at which data becomes consistant (no missing data)
    early_ending_cols = []
    if 'chikou_span_visual' in df.columns:
        early_ending_cols = ['chikou_span_visual']
    start_idx, end_idx = no_missing_data_idx_range(df, early_ending_cols=early_ending_cols)
    
    has_datetimes = True if 'datetime' in df.columns else False
    
    def get_decision_label(trade, first, current_close_price, decisions_so_far, leftover=False):
        decision = 'first' if first else 'second'
        label = None
        if trade[f'{decision}_decision_best_buy_profit'][0] > trade[f'{decision}_decision_best_sell_profit'][0]:
            # add 1 to ticks_till_peak to reserve 0 ticks for 'wait' labels
            ticks_till_peak = trade[f'{decision}_decision_best_buy_profit'][1] - trade['trade_open_tick_i'] + 1
            label = ['buy', ticks_till_peak, trade[f'{decision}_decision_best_buy_profit'][0], trade[f'{decision}_decision_best_buy_profit'][1]]
        elif trade[f'{decision}_decision_best_buy_profit'][0] < trade[f'{decision}_decision_best_sell_profit'][0]:
            ticks_till_peak = trade[f'{decision}_decision_best_sell_profit'][1] - trade['trade_open_tick_i'] + 1
            label = ['sell', ticks_till_peak, trade[f'{decision}_decision_best_sell_profit'][0], trade[f'{decision}_decision_best_sell_profit'][1]]
        
        scaled_min_profit = min_profit if not in_quote_currency else min_profit / current_close_price
        if label and label[2] < scaled_min_profit and not no_waits and not leftover:
            label = ['wait', 0, 0, None]

        if trade[f'{decision}_decision_best_buy_profit'][0] == trade[f'{decision}_decision_best_sell_profit'][0]:
            if print_debug:
                print(f'{decision} decision best buy and sell profit equal, trade: {trade}\n')
            
            if not no_waits:
                label = ['wait', 0, 0, None]
            else:
                decisions_so_far = decisions_so_far[-11:]
                num_buys = decisions_so_far.count('buy')
                num_sells = len(decisions_so_far) - num_buys
                if num_buys > num_sells:
                    ticks_till_peak = trade[f'{decision}_decision_best_buy_profit'][1] - trade['trade_open_tick_i'] + 1
                    label = ['buy', ticks_till_peak, trade[f'{decision}_decision_best_buy_profit'][0], trade[f'{decision}_decision_best_buy_profit'][1]]
                else:
                    ticks_till_peak = trade[f'{decision}_decision_best_sell_profit'][1] - trade['trade_open_tick_i'] + 1
                    label = ['sell', ticks_till_peak, trade[f'{decision}_decision_best_sell_profit'][0], trade[f'{decision}_decision_best_sell_profit'][1]]
                
        return label
    
    # now simulate hedged trades to determine labels
    labels_dict = {}   # 6 labels per label: (1) decision (buy, sell, wait) (str), (2) ticks till best profit (int), and (3) the profit (float) x2 for each decision
    trades = {}
    pending_order = None
    pending_close = None
    decisions_so_far = []
    for i, row in enumerate(data[start_idx:]):
        i += start_idx
        
        if pending_order is not None:
            pending_order_i, causes = pending_order
            open_price = data[i][feature_indices['Open']]
            signal_datetime = None if not has_datetimes else data[pending_order_i][feature_indices['datetime']]                
            trades[pending_order_i] = {
                'signal_datetime': signal_datetime,
                'open_price': open_price,
                'trade_open_tick_i': i,
                'causes': causes,
                'consider_profit': False,
                'first_decision_best_buy_profit': None, # should be tuple of size 2 where 1st elem is the profit and 2nd is the number of bars to get to that profit
                'first_decision_best_sell_profit': None,
                'second_decision_best_buy_profit': None,
                'second_decision_best_sell_profit': None,
                'first_decision_done': False,
                'second_decision_done': False,
                'first_decision_done_tick_dt': None,
                'second_decision_done_tick_dt': None,
                'first_decision_done_tick_i': None,
                'second_decision_done_tick_i': None
            }
            pending_order = None
        
        closed_trades = []
        for trade_i in trades:
            trade = trades[trade_i]
            trade_open_price = trade['open_price']
            close_price = row[feature_indices['Close']]
            last_close_price = data[i-1][feature_indices['Close']] if i-1 != trade_i else trade_open_price
            buy_profit = get_profit(close_price, trade_open_price, pip_value, pip_resolution, in_quote_currency)
            sell_profit = buy_profit * -1
            
            scaled_profit_noise = profit_noise if not in_quote_currency else profit_noise / close_price
            if abs(buy_profit) >= scaled_profit_noise:
                trade['consider_profit'] = True
                    
            if not trade['first_decision_done']:
                if not trade['first_decision_best_buy_profit'] or trade['first_decision_best_buy_profit'][0] < buy_profit:
                    trade['first_decision_best_buy_profit'] = (buy_profit, i, f'debug notes: ({close_price} - {trade_open_price}) / {pip_resolution} * {pip_value}')
                
                if not trade['first_decision_best_sell_profit'] or trade['first_decision_best_sell_profit'][0] < sell_profit:
                    trade['first_decision_best_sell_profit'] = (sell_profit, i, f'debug notes: ({close_price} - {trade_open_price}) / {pip_resolution} * {pip_value}')
                
                # test for end of 1st decision: see if current close price crossed the intial price at which the trade was opened at
                # note: only look for crosses after profit has exceeded profit_noise (small amounts of profit w/ respect to lots_per_trade and in_quote_currency)
                if trade['consider_profit'] and \
                        ((last_close_price < trade_open_price and close_price >= trade_open_price) \
                        or (last_close_price > trade_open_price and close_price <= trade_open_price)): 
                    label = get_decision_label(trade, current_close_price=close_price, first=True, decisions_so_far=decisions_so_far)
                    trade['first_decision_done'] = True
                    first_decision_done_tick_dt = None if not has_datetimes else data[i][feature_indices['datetime']]  
                    trade['first_decision_done_tick_dt'] = first_decision_done_tick_dt
                    trade['first_decision_done_tick_i'] = i
                    labels_dict[trade_i] = {'first_decision': label}
                    decisions_so_far.append(label[0])
                    
                    trade['consider_profit'] = False
                    
                    # at this point trade['second_decision_best_buy_profit'] should be None
                    trade['second_decision_best_buy_profit'] = (buy_profit, i, f'debug notes: ({close_price} - {trade_open_price}) / {pip_resolution} * {pip_value}')
                    trade['second_decision_best_sell_profit'] = (sell_profit, i,f'debug notes: ({close_price} - {trade_open_price}) / {pip_resolution} * {pip_value}')
                       
            elif not trade['second_decision_done']:
                if trade['second_decision_best_buy_profit'][0] < buy_profit:
                    trade['second_decision_best_buy_profit'] = (buy_profit, i,f'debug notes: ({close_price} - {trade_open_price}) / {pip_resolution} * {pip_value}')
                
                if trade['second_decision_best_sell_profit'][0] < sell_profit:
                    trade['second_decision_best_sell_profit'] = (sell_profit, i,f'debug notes: ({close_price} - {trade_open_price}) / {pip_resolution} * {pip_value}')
                
                # test for end of 2nd decision: see if current close price crossed the intial price at which the trade was opened at again
                # note: only look for crosses after profit has exceeded profit_noise (small amounts of profit w/ respect to lots_per_trade and in_quote_currency)
                if trade['consider_profit'] and \
                        ((last_close_price < trade_open_price and close_price >= trade_open_price) \
                        or (last_close_price > trade_open_price and close_price <= trade_open_price)): 
                    label = get_decision_label(trade, current_close_price=close_price, first=False, decisions_so_far=decisions_so_far)
                    trade['second_decision_done'] = True
                    second_decision_done_tick_dt = None if not has_datetimes else data[i][feature_indices['datetime']]
                    trade['second_decision_done_tick_dt'] = second_decision_done_tick_dt
                    trade['second_decision_done_tick_i'] = i
                    labels_dict[trade_i]['second_decision'] = label
                    decisions_so_far.append(label[0])
                    
                    labels_dict[trade_i]['causes'] = ','.join(trade['causes'])
                    closed_trades.append(trade_i)
        
        pending_close = None
        
        for trade_i in closed_trades:
            del trades[trade_i]
            
        causes = []
        for sig in signals_to_consider:
            sig_i = feature_indices[sig]
            if int(row[sig_i]) != 0:
                causes.append(sig)
        
        if len(causes) > 0:
            pending_order = (i, causes)
            pending_close = (i, causes)
    
    # leftover open trades
    for trade_i in trades:
        trade = trades[trade_i]
        
        if not trade['first_decision_done']:
            label = get_decision_label(trade, current_close_price=data[-1][feature_indices['Close']], first=True, leftover=True, decisions_so_far=decisions_so_far)
            labels_dict[trade_i] = {'first_decision': label}
                    
        elif not trade['second_decision_done']:
            label = get_decision_label(trade, current_close_price=data[-1][feature_indices['Close']], first=False, leftover=True, decisions_so_far=decisions_so_far)
            labels_dict[trade_i]['second_decision'] = label
                    
        labels_dict[trade_i]['causes'] = ','.join(trade['causes'])

    first_decision_labels_count = 4
    second_decision_labels_count = 4
    for i in labels_dict:
        entry = labels_dict[i]
        if 'first_decision' in entry:
            if not first_decision_labels_count:
                first_decision_labels_count = len(entry['first_decision'])
            elif first_decision_labels_count != len(entry['first_decision']):
                print(f'number of 1st decision labels are not equal for each row (row {i}: {entry["first_decision"]}, ' 
                      f'changed from {first_decision_labels_count} to {len(entry["first_decision"])},)!')
                return None
        if 'second_decision' in entry:
            if not second_decision_labels_count:
                second_decision_labels_count = len(entry['second_decision'])
            elif second_decision_labels_count != len(entry['second_decision']):
                print(f'number of 2nd decision labels are not equal for each row (row {i}: {entry["second_decision"]}, ' 
                      f'changed from {second_decision_labels_count} to {len(entry["second_decision"])})!')
                return None
    
    labels = []
    for i in range(len(data)):
        if i in labels_dict:
            entry = labels_dict[i] 
            causes_label = entry['causes']
            
            first_decision_labels = [None] * first_decision_labels_count
            second_decision_labels = [None] * second_decision_labels_count
            if 'first_decision' in entry:
                first_decision_labels = entry['first_decision']
            if 'second_decision' in entry:
                second_decision_labels = entry['second_decision']
                            
            label = [*first_decision_labels, *second_decision_labels, causes_label]

            labels.append(label)
        
        # just assign 'wait' labels to rows of data where no ichimoku signlas occured if label_non_signals==True
        elif label_non_signals:
            labels.append(['wait', 0, 0, None,'wait', 0, 0, None, None])
            
        # otherwise just put None labels
        else:
            labels.append([None] * (first_decision_labels_count + second_decision_labels_count + 1)) # +1 for causes label
    
    label_names = ['first_decision','ticks_till_best_profit_first_decision', 'best_profit_first_decision', 'profit_peak_first_decision',
                   'second_decision','ticks_till_best_profit_second_decision', 'best_profit_second_decision', 'profit_peak_second_decision',
                   'causes']
    
    labels_df = pd.DataFrame(labels, columns=label_names)
    return labels_df

In [9]:
# def compare_labels_dataframes(df1, df2, print_each_line=True, only_show_diff_rows=True):
#     data1 = tuple(df1.itertuples())
#     data2 = tuple(df2.itertuples())
    
#     if len(data1) != len(data2):
#         print('dataframes do not have equal number of rows')
    
#     diff_rows = 0
#     for row1, row2 in zip(data1, data2):
#         row1 = tuple(row1)
#         row2 = tuple(row2)
#         all_equal = True
#         diff_cols = []
        
#         for i in range(max(len(row1), len(row2))):
#             if pd.isnull(row1[i]) and pd.isnull(row2[i]):
#                 continue
#             elif isinstance(row1[i], float) and isinstance(row2[i], float):
#                 if round(row1[i], 3) != round(row2[i], 3):
#                     all_equal = False
#                     diff_cols.append(i)
#             else:
#                 if row1[i] != row2[i]:
#                     all_equal = False
#                     diff_cols.append(i)
            
#         if not all_equal:
#             diff_rows += 1
        
#         if print_each_line and (not only_show_diff_rows or (only_show_diff_rows and not all_equal)):
#             print(f'df1 row: {row1}')
#             print(f'df2 row: {row2}')
#             print(f'df1 row types: {[type(x) for x in row1]}')
#             print(f'df2 row types: {[type(x) for x in row2]}')
#             print(f'same rows: {all_equal}')
#             print(f'different column indices: {diff_cols}\n')

#     print(f'number of diff rows: {diff_rows}')
    
# d1 = generate_ichimoku_labels(data_with_ichi_2)
# # d1.to_csv('./test1.csv')
# d2 = generate_ichimoku_labels(data_with_ichi_2, min_profit_percent=0.0001, profit_noise_percent=0)
# # d2.to_csv('./test2.csv')
# d3 = generate_ichimoku_labels(data_with_ichi_2, label_non_signals=True)
# # d3.to_csv('./test3.csv')
# compare_labels_dataframes(d1, d2)

# XGBoost model

In [10]:
import xgboost as xgb

#### try out different models w/ diff hyperparams

In [None]:
"""
XGBoost param tuning guide:
https://towardsdatascience.com/fine-tuning-xgboost-in-python-like-a-boss-b4543ed8b1e
"""

contract_size = 100_000   # size of 1 lot is typically 100,000 (100 for gold, becuase 1 lot = 100 oz of gold)
in_quote_currency = True
pip_resolution = 0.0001
labels_dict = {1: 'buy', 0: 'sell'}

profit_percentages = [(pp/1000,pp/1000) for pp in range(1,101,3)]

param_grid = {
    'ichi_settings': [(9,26,52),(8,22,24),(9,30,60)],
    'labeling_params': [{
        'label_non_signals': [False],
        'profit_percentages': profit_percentages,
        'lots_per_trade': [0.2],
    }],
    'xgboost_params': [{
        'n_estimators': [3000],
        'max_depth': [2],
        'learning_rate': [0.1],
        'subsample': [1],
        'colsample_bytree': [1],
        'gamma': [1]
    }]
}

param_grid = ParameterGrid(param_grid)
param_grid = random.sample(list(param_grid), len(param_grid))

In [None]:
filepath = gi.download_mt5_data("EURUSD", 'H1', '2012-01-02', '2020-12-18')
train_split = 0.7
results = []
best_params_first_decision = None
best_score_first_decision = None
best_params_second_decision = None
best_score_second_decision = None
num_class = 3 # buy, sell, wait
signals_to_consider = ['cloud_breakout_bull','cloud_breakout_bear',                       # cloud breakout
                       'tk_cross_bull_strength', 'tk_cross_bear_strength',                # Tenkan Sen / Kijun Sen Cross
                       'tk_price_cross_bull_strength', 'tk_price_cross_bear_strength',    # price crossing both the Tenkan Sen / Kijun Sen
                       'senkou_cross_bull_strength', 'senkou_cross_bear_strength',        # Senkou Span Cross
                       'chikou_cross_bull_strength', 'chikou_cross_bear_strength']        # Chikou Span Cross
pc_cols = ['Open','High','Low','Close','Volume',
           'trend_ichimoku_base','trend_ichimoku_conv',
           'trend_ichimoku_a', 'trend_ichimoku_b']

start_time = time.time()
for i, params in enumerate(param_grid):
    ichi_settings = params['ichi_settings']
    labeling_params = params['labeling_params']
    xgboost_params = params['xgboost_params']
    
    labeling_params = ParameterGrid(labeling_params)
    labeling_params = random.sample(list(labeling_params), len(labeling_params))
    xgboost_params = ParameterGrid(xgboost_params)
    xgboost_params = random.sample(list(xgboost_params), len(xgboost_params))
        
    model_config = {
        'current_model':'ichi_cloud',
        'ichi_cloud':{
            'indicators': {
                'ichimoku': {
                    'tenkan_period': ichi_settings[0],
                    'kijun_period': ichi_settings[1],
                    'chikou_period': ichi_settings[1],
                    'senkou_b_period': ichi_settings[2]
                },
                'rsi': {
                    'periods': 14
                }
            }
        }
    }
    
    # load in and split data
    
    data_with_ta_indicators = gi.add_indicators_to_raw(filepath=filepath, 
                                                       save_to_disk=False, 
                                                       config=model_config, 
                                                       has_headers=True,
                                                       datetime_col='datetime')
    data_with_ichi_signals = add_features(data_with_ta_indicators)
    start_idx, end_idx = no_missing_data_idx_range(data_with_ichi_signals, early_ending_cols=['chikou_span_visual'])
    data_with_ichi_signals = data_with_ichi_signals[start_idx:].reset_index(drop=True)
    
    if train_split > 1:
        print(f'train_split ({train_split}) is greater than 1, stopping.')
    
    train_p = train_split
    num_rows = len(data_with_ichi_signals)
    train_data_count = int(train_p * num_rows)
    
    train_data_orig = data_with_ichi_signals.iloc[:train_data_count]
    validation_data_orig = data_with_ichi_signals.iloc[train_data_count:]
    
    for j, label_params in enumerate(labeling_params):
        label_non_signals = label_params['label_non_signals']
        min_profit_percent, profit_noise_percent = label_params['profit_percentages']
        lots_per_trade = label_params['lots_per_trade']
    
        # generate labels for data

        train_data_labels = generate_ichimoku_labels(train_data_orig, label_non_signals=label_non_signals, min_profit_percent=min_profit_percent, 
                                                     profit_noise_percent=profit_noise_percent, signals_to_consider=signals_to_consider, 
                                                     contract_size=contract_size, lots_per_trade=lots_per_trade,
                                                     in_quote_currency=in_quote_currency,pip_resolution=pip_resolution, print_debug=False)
        validation_data_labels = generate_ichimoku_labels(validation_data_orig, label_non_signals=label_non_signals, min_profit_percent=min_profit_percent, 
                                                          profit_noise_percent=profit_noise_percent, signals_to_consider=signals_to_consider, 
                                                          contract_size=contract_size, lots_per_trade=lots_per_trade,
                                                          in_quote_currency=in_quote_currency, pip_resolution=pip_resolution, print_debug=False)
        
        train_data = apply_perc_change(train_data_orig, cols=pc_cols, limit=1)
        start_idx, end_idx = no_missing_data_idx_range(train_data, early_ending_cols=['chikou_span_visual'])
        train_data = train_data.iloc[start_idx:end_idx+1]
        train_data_labels = train_data_labels.iloc[start_idx:end_idx+1]
        
        validation_data = apply_perc_change(validation_data_orig, cols=pc_cols, limit=1)
        start_idx, end_idx = no_missing_data_idx_range(validation_data, early_ending_cols=['chikou_span_visual'])
        validation_data = validation_data.iloc[start_idx:end_idx+1]
        validation_data_labels = validation_data_labels.iloc[start_idx:end_idx+1]

        x_train_first_decisions, y_train_first_decisions = missing_labels_preprocess(train_data, train_data_labels, 'first_decision')
        x_valid_first_decisions, y_valid_first_decisions = missing_labels_preprocess(validation_data, validation_data_labels, 'first_decision')
        x_train_first_decisions_profits, y_train_first_decisions_profits = missing_labels_preprocess(train_data, train_data_labels, 
                                                                                                     'best_profit_first_decision')
        x_valid_first_decisions_profits, y_valid_first_decisions_profits = missing_labels_preprocess(validation_data, validation_data_labels, 
                                                                                                     'best_profit_first_decision')

        x_train_second_decisions, y_train_second_decisions = missing_labels_preprocess(train_data, train_data_labels, 'second_decision')
        x_valid_second_decisions, y_valid_second_decisions = missing_labels_preprocess(validation_data, validation_data_labels, 'second_decision')
        x_train_second_decisions_profits, y_train_second_decisions_profits = missing_labels_preprocess(train_data, train_data_labels, 
                                                                                                       'best_profit_second_decision')
        x_valid_second_decisions_profits, y_valid_second_decisions_profits = missing_labels_preprocess(validation_data, validation_data_labels, 
                                                                                                       'best_profit_second_decision')

        # generate predictions w/ XGBoost model
        for k, xgb_params in enumerate(xgboost_params):
            n_estimators = xgb_params['n_estimators']
            max_depth = xgb_params['max_depth']
            learning_rate = xgb_params['learning_rate']
            subsample = xgb_params['subsample']
            colsample_bytree = xgb_params['colsample_bytree']
            gamma = xgb_params['gamma']
            
            if min_profit_percent==profit_noise_percent:
                # binrary classification problem (buy or sell)
                error_metric_name = 'error'
                xgb_params = {'max_depth':max_depth, 'learning_rate':learning_rate, 'objective':'binary:logistic', 'eval_metric': error_metric_name, 
                              'gamma':gamma, 'colsample_bytree':colsample_bytree, 'subsample':subsample}
            else:
                # multi-class classification problem (buy, sell, or wiat)
                error_metric_name = 'merror'
                xgb_params = {'max_depth':max_depth, 'learning_rate':learning_rate, 'objective':'multi:softmax', 'num_class': num_class,
                              'eval_metric': error_metric_name, 'gamma':gamma, 'colsample_bytree':colsample_bytree, 'subsample':subsample}
            
            ### first decisions

            y_train_true, labels_dict = convert_class_labels(y_train_first_decisions, labels_dict=labels_dict)
            y_valid_true, labels_dict = convert_class_labels(y_valid_first_decisions, labels_dict=labels_dict)

            dtrain = xgb.DMatrix(x_train_first_decisions, label=y_train_true)
            dvalidation = [(xgb.DMatrix(x_train_first_decisions, label=y_train_true),'train'), 
                           (xgb.DMatrix(x_valid_first_decisions, label=y_valid_true),'validation')]
            dtest = xgb.DMatrix(x_valid_first_decisions)
            
            evals_result = {}
            decision_predictor = xgb.train(xgb_params, dtrain, num_boost_round=n_estimators, evals=dvalidation, 
                                           evals_result=evals_result, verbose_eval=False)
            
            train_error = evals_result['train'][error_metric_name][-1]
            train_accuracy_first_decision = 1 - train_error
            
            validation_error = evals_result['validation'][error_metric_name][-1]
            validation_accuracy_first_decision = 1 - validation_error
            
            y_test_probs = decision_predictor.predict(dtest)
            y_test_preds = np.around(y_test_probs)
            y_test_preds = pd.DataFrame(y_test_preds, columns=y_valid_true.columns)
            y_test_preds = convert_class_labels(y_test_preds, to_ints=False, labels_dict=labels_dict)[0]
            p_profits_first_decision = potention_profits(y_valid_first_decisions, y_test_preds, y_valid_first_decisions_profits)

            ### second decisions

            y_train_true, labels_dict = convert_class_labels(y_train_second_decisions, labels_dict=labels_dict)
            y_valid_true, labels_dict = convert_class_labels(y_valid_second_decisions, labels_dict=labels_dict)

            dtrain = xgb.DMatrix(x_train_second_decisions, label=y_train_true)
            dvalidation = [(xgb.DMatrix(x_train_second_decisions, label=y_train_true),'train'), 
                           (xgb.DMatrix(x_valid_second_decisions, label=y_valid_true),'validation')]
            dtest = xgb.DMatrix(x_valid_second_decisions)
            
            evals_result = {}
            decision_predictor = xgb.train(xgb_params, dtrain, num_boost_round=n_estimators, evals=dvalidation, 
                                           evals_result=evals_result, verbose_eval=False)

            train_error = evals_result['train'][error_metric_name][-1]
            train_accuracy_second_decision = 1 - train_error
            
            validation_error = evals_result['validation'][error_metric_name][-1]
            validation_accuracy_second_decision = 1 - validation_error
            
            y_test_probs = decision_predictor.predict(dtest)
            y_test_preds = np.around(y_test_probs)
            y_test_preds = pd.DataFrame(y_test_preds, columns=y_valid_true.columns)
            y_test_preds = convert_class_labels(y_test_preds, to_ints=False, labels_dict=labels_dict)[0]
            p_profits_second_decision = potention_profits(y_valid_second_decisions, y_test_preds, y_valid_second_decisions_profits)
        
            all_params = {
                'tenkan_period': ichi_settings[0],
                'kijun_period': ichi_settings[1],
                'chikou_period': ichi_settings[1],
                'senkou_b_period': ichi_settings[2],
                'label_non_signals': label_non_signals,
                'min_profit_percent': min_profit_percent,
                'profit_noise_percent': profit_noise_percent,
                'lots_per_trade': lots_per_trade,
                'n_estimators': n_estimators,
                'max_depth': max_depth,
                'learning_rate': learning_rate,
                'subsample': subsample,
                'colsample_bytree': colsample_bytree,
                'gamma': gamma,
                'train_accuracy_first_decision': train_accuracy_first_decision,
                'validation_accuracy_first_decision': validation_accuracy_first_decision,
                'train_accuracy_second_decision': train_accuracy_second_decision,
                'validation_accuracy_second_decision': validation_accuracy_second_decision,
                'potention_profits_first_decision': p_profits_first_decision,
                'potention_profits_second_decision': p_profits_second_decision
            }
            
            first_decision_score = validation_accuracy_first_decision
            second_decision_score = validation_accuracy_second_decision
            
            if not best_score_first_decision or first_decision_score > best_score_first_decision:
                best_score_first_decision = first_decision_score
                best_params_first_decision = all_params
                
            if not best_score_second_decision or second_decision_score > best_score_second_decision:
                best_score_second_decision = second_decision_score
                best_params_second_decision = all_params
            
            results.append(all_params)
            
            print('--------------------------------------------------------------------')
            print(f'{k+1}/{len(xgboost_params)} xgb params evaulated')
            print('--------------------------------------------------------------------\n')
            print(f'last params evaluated:')
            print(f'{all_params}\n')
            print(f'best first decision params evaluated:')
            print(f'{best_params_first_decision}\n')
            print(f'best second decision params evaluated:')
            print(f'{best_params_second_decision}\n')

        print('--------------------------------------------------------------------')
        print(f'{j+1}/{len(labeling_params)} labeling params evaulated')
        print('--------------------------------------------------------------------\n')
        
        results_sorted = sorted(results, key=lambda d: d['validation_accuracy_first_decision'], reverse=True)
        results_sorted_df = pd.DataFrame(results_sorted)
        results_sorted_df.to_csv('../my_stuff/grid_search_results.csv')
        
    print('--------------------------------------------------------------------')
    print(f'{i+1}/{len(param_grid)} ichimoku settings evaulated')
    print('--------------------------------------------------------------------\n')
    
results_sorted = sorted(results, key=lambda d: d['validation_accuracy_first_decision'], reverse=True)
results_sorted_df = pd.DataFrame(results_sorted)
results_sorted_df.to_csv('../my_stuff/grid_search_results.csv')
print(f'runtime: {(time.time()-start_time)/60} min')

#### train model for backtesting

In [33]:
# hyperparameters

label_non_signals = False
min_profit_percent, profit_noise_percent = 0.01, 0.01
contract_size = 100_000   # size of 1 lot is typically 100,000 (100 for gold, becuase 1 lot = 100 oz of gold)
lots_per_trade = 0.2  
currency_side = 'right'
in_quote_currency = True if currency_side == 'right' else False
pip_resolution = 0.0001

labels_dict = {1: 'buy', 0: 'sell'}
n_estimators = 3000
max_depth = 2
learning_rate = 0.1
subsample = 1
colsample_bytree = 1
gamma = 1
tenkan_period = 9
kijun_period = 30
senkou_b_period = 60
model_config = {
    'current_model':'ichi_cloud',
    'ichi_cloud':{
        'indicators': {
            'ichimoku': {
                'tenkan_period': tenkan_period,
                'kijun_period': kijun_period,
                'chikou_period': kijun_period,
                'senkou_b_period': senkou_b_period
            },
            'rsi': {
                'periods': 14
            }
        }
    }
}
signals_to_consider = ['cloud_breakout_bull','cloud_breakout_bear',                       # cloud breakout
                       'tk_cross_bull_strength', 'tk_cross_bear_strength',                # Tenkan Sen / Kijun Sen Cross
                       'tk_price_cross_bull_strength', 'tk_price_cross_bear_strength',    # price crossing both the Tenkan Sen / Kijun Sen
                       'senkou_cross_bull_strength', 'senkou_cross_bear_strength',        # Senkou Span Cross
                       'chikou_cross_bull_strength', 'chikou_cross_bear_strength']        # Chikou Span Cross
sigs_for_filename = 'cb-tk-tkp-sen-chi'

# get data

cur_pair = 'EURUSD'
timeframe = 'H1'
tick_data_filepath = gi.download_mt5_data(cur_pair, timeframe, '2011-01-01', '2020-10-01')
data_with_indicators = gi.add_indicators_to_raw(filepath=tick_data_filepath, 
                                                save_to_disk=True, 
                                                config=model_config, 
                                                has_headers=True,
                                                datetime_col='datetime')
train_data = add_features(data_with_indicators)

train_data_labels = generate_ichimoku_labels(train_data, label_non_signals=label_non_signals, min_profit_percent=min_profit_percent, 
                                             profit_noise_percent=profit_noise_percent, signals_to_consider=signals_to_consider, 
                                             contract_size=contract_size, lots_per_trade=lots_per_trade,
                                             in_quote_currency=in_quote_currency,pip_resolution=pip_resolution)

pc_cols = ['Open','High','Low','Close','Volume',
           'trend_ichimoku_base','trend_ichimoku_conv',
           'trend_ichimoku_a', 'trend_ichimoku_b']
train_data = apply_perc_change(train_data, cols=pc_cols, limit=1)
start_idx, end_idx = no_missing_data_idx_range(train_data)
train_data = train_data.iloc[start_idx:end_idx+1]
train_data_labels = train_data_labels.iloc[start_idx:end_idx+1]

x_train_first_decisions, y_train_first_decisions = missing_labels_preprocess(train_data, train_data_labels, 'first_decision')

loaded 60431 rows of tick data from C:\GitHub Repos\ForexMachine\Data\.cache\mt5_EURUSD_h1_ticks_2011-01-01T00;00UTC_to_2020-10-01T00;00UTC.csv
saved 60431 rows of EURUSD h1 tick data to C:\GitHub Repos\ForexMachine\Data\RawData\mt5_EURUSD_h1_ticks_2011-01-01T00;00UTC_to_2020-10-01T00;00UTC.csv, done.


In [34]:
# train model
y_train_true, labels_dict = convert_class_labels(y_train_first_decisions, labels_dict=labels_dict)

dtrain = xgb.DMatrix(x_train_first_decisions, label=y_train_true)
dvalidation = [(xgb.DMatrix(x_train_first_decisions, label=y_train_true),'train')]

if min_profit_percent==profit_noise_percent:
    # binrary classification problem (buy or sell)
    error_metric_name = 'error'
    xgb_params = {'max_depth':max_depth, 'learning_rate':learning_rate, 'objective':'binary:logistic', 'eval_metric': error_metric_name, 'gamma':gamma,
                  'colsample_bytree':colsample_bytree, 'subsample':subsample}
else:
    # multi-class classification problem (buy, sell, or wiat)
    error_metric_name = 'merror'
    xgb_params = {'max_depth':max_depth, 'learning_rate':learning_rate, 'objective':'multi:softmax', 'num_class': num_class,
                  'eval_metric': error_metric_name, 'gamma':gamma, 'colsample_bytree':colsample_bytree, 'subsample':subsample}
evals_result = {}
xgb_first_decision_predictor = xgb.train(xgb_params, dtrain, num_boost_round=n_estimators, evals=dvalidation, evals_result=evals_result)

# print train error
train_error = evals_result['train']['error'][-1]
print(f'\ntrain error: {train_error}')
print(f'train accuracy: {1 - train_error}')

# save model
xgb_first_decision_predictor.save_model(f'../my_stuff/{cur_pair}-{timeframe}_{min_profit_percent}-min_profit_{lots_per_trade}-lots_{currency_side}-cur_side'
                                        f'_{tenkan_period}-{kijun_period}-{senkou_b_period}-{sigs_for_filename}-ichi_xgb_classifier.json')

[0]	train-error:0.45984
[1]	train-error:0.45647
[2]	train-error:0.45815
[3]	train-error:0.45771
[4]	train-error:0.45984
[5]	train-error:0.45771
[6]	train-error:0.45771
[7]	train-error:0.45753
[8]	train-error:0.45771
[9]	train-error:0.45584
[10]	train-error:0.45771
[11]	train-error:0.45815
[12]	train-error:0.45735
[13]	train-error:0.45718
[14]	train-error:0.45744
[15]	train-error:0.45744
[16]	train-error:0.45744
[17]	train-error:0.45620
[18]	train-error:0.45593
[19]	train-error:0.45522
[20]	train-error:0.45238
[21]	train-error:0.45274
[22]	train-error:0.45354
[23]	train-error:0.45327
[24]	train-error:0.45336
[25]	train-error:0.45425
[26]	train-error:0.45363
[27]	train-error:0.45300
[28]	train-error:0.45150
[29]	train-error:0.45123
[30]	train-error:0.44732
[31]	train-error:0.44697
[32]	train-error:0.44510
[33]	train-error:0.44555
[34]	train-error:0.44484
[35]	train-error:0.44413
[36]	train-error:0.44457
[37]	train-error:0.44448
[38]	train-error:0.44546
[39]	train-error:0.44413
[40]	train

In [35]:
# test model on test data
tick_data_filepath = gi.download_mt5_data("EURUSD", 'H1', '2020-10-02', '2020-12-18')
data_with_indicators = gi.add_indicators_to_raw(filepath=tick_data_filepath, 
                                                save_to_disk=True, 
                                                config=model_config, 
                                                has_headers=True,
                                                datetime_col='datetime')
test_data = add_features(data_with_indicators)

test_data_labels = generate_ichimoku_labels(test_data, label_non_signals=label_non_signals, min_profit_percent=min_profit_percent, 
                                             profit_noise_percent=profit_noise_percent, signals_to_consider=signals_to_consider, 
                                             contract_size=contract_size, lots_per_trade=lots_per_trade,
                                             in_quote_currency=in_quote_currency,pip_resolution=pip_resolution)

test_data = apply_perc_change(test_data, cols=pc_cols, limit=1)
start_idx, end_idx = no_missing_data_idx_range(test_data, early_ending_cols=['chikou_span_visual'])
test_data = test_data.iloc[start_idx:end_idx+1]
test_data_labels = test_data_labels.iloc[start_idx:end_idx+1]

x_test_first_decisions, y_test_first_decisions = missing_labels_preprocess(test_data, test_data_labels, 'first_decision')
x_test_first_decisions_profits, y_test_first_decisions_profits = missing_labels_preprocess(test_data, test_data_labels, 'best_profit_first_decision')

y_test_true, labels_dict = convert_class_labels(y_test_first_decisions, to_numpy=True, labels_dict=labels_dict)

dtest = xgb.DMatrix(x_test_first_decisions)
y_test_probs = xgb_first_decision_predictor.predict(dtest)

y_test_preds = np.around(y_test_probs)
y_test_preds = pd.DataFrame(y_test_preds, columns=y_test_first_decisions.columns)
y_test_preds = convert_class_labels(y_test_preds, to_ints=False, labels_dict=labels_dict)[0]

# print results
test_error, test_wrong_indices = error_rate(y_test_first_decisions, y_test_preds)
p_profits_first_decision = potention_profits(y_test_first_decisions, y_test_preds, y_test_first_decisions_profits)

print(f'\ntest error: {test_error}')
print(f'test accuracy: {1 - test_error}')
print(f'potential profits from test data: {p_profits_first_decision}')
print(f'buy/sell counts:\n{y_test_preds["first_decision"].value_counts()}')

x = x_test_first_decisions.to_numpy()
ytp = y_test_preds["first_decision"].to_numpy()

sell_inputs = []
for i in range(len(ytp)):
    if ytp[i] == 'sell':
        sell_inputs.append(x[i])

loaded 1320 rows of tick data from C:\GitHub Repos\ForexMachine\Data\.cache\mt5_EURUSD_h1_ticks_2020-10-02T00;00UTC_to_2020-12-18T00;00UTC.csv
saved 1320 rows of EURUSD h1 tick data to C:\GitHub Repos\ForexMachine\Data\RawData\mt5_EURUSD_h1_ticks_2020-10-02T00;00UTC_to_2020-12-18T00;00UTC.csv, done.

test error: 0.32158590308370044
test accuracy: 0.6784140969162995
potential profits from test data: 81816.39999999983
buy/sell counts:
buy     193
sell     34
Name: first_decision, dtype: int64


#### analyze binary probs

In [36]:
print(labels_dict,'\n')
test_wrong_indices = set(test_wrong_indices)
y_test_preds_np = np.around(y_test_probs)
wrong_probs_diffs = []
correct_probs_diffs = []
for i in range(len(y_test_probs)):
    if i in test_wrong_indices:
        wrong_probs_diffs.append(abs(y_test_preds_np[i]-y_test_probs[i]))
        print(f'WRONG: true label={y_test_true[i]}, prob={y_test_probs[i]},{wrong_probs_diffs[-1]}')
    else:
        correct_probs_diffs.append(abs(y_test_true[i]-y_test_probs[i]))
        print(f'CORRECT: true label={y_test_true[i]}, prob={y_test_probs[i]},{correct_probs_diffs[-1]}')

fig, ax = plt.subplots()
ax.hist(wrong_probs_diffs, color='red', density=True)
ax.set_title("histogram of differences between wrong labels and XGB model probs")
plt.show()

fig, ax = plt.subplots()
ax.hist(correct_probs_diffs, color='green', density=True)
ax.set_title("histogram of differences between correct labels and XGB model probs")
plt.show()

{1: 'buy', 0: 'sell'} 

CORRECT: true label=1, prob=0.5895295143127441,0.41047048568725586
CORRECT: true label=1, prob=0.5297814607620239,0.4702185392379761
CORRECT: true label=1, prob=0.545541524887085,0.45445847511291504
CORRECT: true label=1, prob=0.5556892156600952,0.4443107843399048
CORRECT: true label=1, prob=0.5123186707496643,0.4876813292503357
WRONG: true label=0, prob=0.5582850575447083,0.44171494245529175
WRONG: true label=0, prob=0.5660315752029419,0.4339684247970581
WRONG: true label=0, prob=0.5056567788124084,0.49434322118759155
WRONG: true label=0, prob=0.5227710008621216,0.4772289991378784
WRONG: true label=0, prob=0.5327770709991455,0.4672229290008545
WRONG: true label=0, prob=0.5528644919395447,0.4471355080604553
WRONG: true label=0, prob=0.6500375866889954,0.34996241331100464
WRONG: true label=0, prob=0.5231617093086243,0.47683829069137573
CORRECT: true label=1, prob=0.5124702453613281,0.4875297546386719
WRONG: true label=1, prob=0.4195222854614258,0.4195222854614258

# RL w/ gym-anytrading

In [None]:
import gym
import gym_anytrading
from gym_anytrading.envs.forex_env import ForexEnv

from gym_anytrading.datasets import FOREX_EURUSD_1H_ASK

from stable_baselines import A2C
from stable_baselines.common.vec_env import DummyVecEnv

import quantstats as qs

##### trying out sample code

In [None]:
df = gym_anytrading.datasets.STOCKS_GOOGL.copy()
df = df.drop(['Adj Close'], axis=1)

window_size = 10
start_index = window_size
end_index = len(df)

env_maker = lambda: gym.make(
    'stocks-v0',
    df = df,
    window_size = window_size,
    frame_bound = (start_index, end_index)
)

env = DummyVecEnv([env_maker])

In [None]:
i, end = no_missing_data_idx_range(data_with_ichi_2, early_ending_cols=['chikou_span_visual'])
train_df = data_with_ichi_2.iloc[i:]
train_df.set_index('datetime', inplace=True, verify_integrity=True)
categories_dict = {
    'quarter': [1,2,3,4],
    'day_of_week': [0,1,2,3,4]
}
train_df = dummy_and_remove_data(train_df, categories_dict=categories_dict, cols_to_remove=['momentum_rsi','month','day','minute','hour','year','spread'],
                                 include_defaults=False)
train_df.head()

In [None]:
class CustomForexEnv(ForexEnv):
    def _process_data(self):
        prices = self.df.loc[:, 'Close'].to_numpy()

        prices[self.frame_bound[0] - self.window_size]  # validate index (TODO: Improve validation)
        prices = prices[self.frame_bound[0]-self.window_size:self.frame_bound[1]]

        diff = np.insert(np.diff(prices), 0, 0)
        signal_features = np.column_stack((prices, diff))
        
        my_features = self.df.iloc[:,4:].to_numpy()
        signal_features = np.column_stack((signal_features, my_features))
#         print(list(signal_features[0]))

        return prices, signal_features

In [None]:
df = train_df

window_size = 10
start_index = window_size
end_index = len(df)

env_maker = lambda: CustomForexEnv(
    df = train_df,
    window_size = window_size,
    frame_bound = (start_index, end_index),
    unit_side = 'right'
)

env = DummyVecEnv([env_maker])

In [None]:
policy_kwargs = dict(net_arch=[64, 'lstm', dict(vf=[128, 128, 128], pi=[64, 64])])
model = A2C('MlpLstmPolicy', env, verbose=1, policy_kwargs=policy_kwargs)
model.learn(total_timesteps=5000)

In [None]:
env = env_maker()
observation = env.reset()
actions = []
while True:
    observation = observation[np.newaxis, ...]

    # action = env.action_space.sample()
    action, _states = model.predict(observation)
    actions.append(action)
    observation, reward, done, info = env.step(action)

    # env.render()
    if done:
        print("info:", info)
        break
print(observation)
# for action in actions:
#     print(action)
# print(len(actions))

In [None]:
%matplotlib qt
plt.figure(figsize=(16, 6))
env.render_all()
plt.show()

In [None]:
%matplotlib inline
qs.extend_pandas()

net_worth = pd.Series(env.history['total_profit'], index=df.index[start_index+1:end_index])
returns = net_worth.pct_change().iloc[1:]

qs.reports.full(returns)
qs.reports.html(returns, output='a2c_quantstats.html')

# CNN Bi-LSTM

In [11]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [12]:
print(f'is GPU available for TF: {tf.test.is_gpu_available()}\n')

gpu_devices = tf.config.list_physical_devices('GPU')
print(f'GPU devices: {gpu_devices}\n')

all_devices = tf.config.list_physical_devices()
print(f'all devices: {all_devices}')

if len(gpu_devices) > 0:
    for device in gpu_devices: 
        tf.config.experimental.set_memory_growth(device, True)

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
is GPU available for TF: True

GPU devices: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

all devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU')]


#### hyperparameters

In [13]:
seq_len = 128
num_epochs = 400
fast_ma_window = 3
slow_ma_window = 7
tenkan_period = 9
kijun_period = 30
senkou_b_period = 60
cur_pair = 'EURUSD'
timeframe = 'H1'
model_config = {
    'current_model':'ichi_cloud',
    'ichi_cloud':{
        'indicators': {
            'ichimoku': {
                'tenkan_period': tenkan_period,
                'kijun_period': kijun_period,
                'chikou_period': kijun_period,
                'senkou_b_period': senkou_b_period
            },
            'rsi': {
                'periods': 14
            }
        }
    }
}
ma_cols = ['Open','High','Low','Close','Volume']
pc_cols = ['Open','High','Low','Close','Volume',]
#            'trend_ichimoku_base','trend_ichimoku_conv',
#            'trend_ichimoku_a', 'trend_ichimoku_b']
normalization_groups = [['Open','High','Low','Close'],  # prices
#                         ['trend_ichimoku_base','trend_ichimoku_conv'],  # ichi conv & base lines
#                         ['trend_ichimoku_a', 'trend_ichimoku_b'], # ichi cloud lines
                        ['tk_cross_bull_strength','tk_cross_bear_strength',   # tk cross strength
                        'tk_price_cross_bull_strength','tk_price_cross_bear_strength',   # tk price cross strength
                        'senkou_cross_bull_strength','senkou_cross_bear_strength',   # semkou cross strength
                        'chikou_cross_bull_strength','chikou_cross_bear_strength']]   # chikou cross strength

train_perc = 0.8
val_perc = (1-train_perc)/2
test_perc = val_perc
split_percents = (val_perc, test_perc)

#### get data and preprocess

In [14]:
def convert_to_tensor(data):
    tensor = tf.convert_to_tensor(data, dtype=tf.float32)
    return tensor

def get_best_batch_size(data_len, min_bs, max_bs):
    best_bs = 0
    best_size = 0
    for i in range(data_len):
        bs = min_bs
        cur_len = data_len - i
        while cur_len % bs != 0 and bs < max_bs:
            bs+=1
        if cur_len % bs == 0:
            best_size = cur_len
            best_bs = bs
            break
    return best_bs, best_size

def get_split_data_ma(preprocessed_data_df, ma_window, seq_len, split_percents=None, ma_cols=None, pc_cols=None, normalization_groups=None, 
                      min_batch_size=1000, max_batch_size=3500, just_train=False, print_info=True, fully_divisible_batch_sizes=False, batch_size=1024,
                      buy_sell_labels_df=None, apply_pct_change=True):
    feature_names = preprocessed_data_df.columns
    if buy_sell_labels_df is not None:
        if buy_sell_labels_df.shape[0] != preprocessed_data_df.shape[0]:
            print(f'buy_sell_labels_df (shape={buy_sell_labels_df.shape}) does not have the same '
                  f'number of rows as preprocessed_data_df (shape={preprocessed_data_df.shape})')
            return
        buy_sell_label_name = buy_sell_labels_df.name
        preprocessed_data_df = pd.concat((preprocessed_data_df, buy_sell_labels_df), axis=1)  
    
    if not ma_cols:
        ma_cols = ['Open','High','Low','Close','Volume']
    
    if not pc_cols:
        pc_cols = ['Open','High','Low','Close','Volume',
                   'trend_ichimoku_base','trend_ichimoku_conv',
                   'trend_ichimoku_a', 'trend_ichimoku_b']
        
    if not normalization_groups:
        normalization_groups = [['Open','High','Low','Close'],  # prices
                                ['trend_ichimoku_base','trend_ichimoku_conv'],  # ichi conv & base lines
                                ['trend_ichimoku_a', 'trend_ichimoku_b'], # ichi cloud lines
                                ['tk_cross_bull_strength','tk_cross_bear_strength',   # tk cross strength
                                'tk_price_cross_bull_strength','tk_price_cross_bear_strength',   # tk price cross strength
                                'senkou_cross_bull_strength','senkou_cross_bear_strength',   # semkou cross strength
                                'chikou_cross_bull_strength','chikou_cross_bear_strength']]   # chikou cross strength
        
    col_to_idx = {col_name: preprocessed_data_df.columns.get_loc(col_name) for col_name in preprocessed_data_df.columns}

    # apply moving average to data to reduce bias (just learns centre of data) due to rugged raw price data that might look like a random walk to model
    # (but since we are now predicting a mov avg of the price it is less representative of the outliers which are still important)
    if ma_window is not None:
        preprocessed_data_df = apply_moving_avg(preprocessed_data_df, cols=ma_cols, window=ma_window)
        preprocessed_data_df.dropna(how='any', axis=0, inplace=True, subset=feature_names) # drop any NA rows due to applying moving average
    
    # apply percentage change to data to make it so data is more stationary (past data more related to future data) since 
    # the price data is typically strictly increasing or decreasing over the whole distribution
    if apply_pct_change:
        preprocessed_data_df = apply_perc_change(preprocessed_data_df, cols=pc_cols)
        preprocessed_data_df.dropna(how='any', axis=0, inplace=True, subset=feature_names) # drop any NA rows due to applying percentage change
    
    if buy_sell_labels_df is not None:
        buy_sell_labels_df = preprocessed_data_df[buy_sell_label_name]
        preprocessed_data_df = preprocessed_data_df[feature_names]
    
    # normalize data for improved model training performance
    all_train_df, all_train_normalization_terms = normalize_data(preprocessed_data_df, train_data=True, groups=normalization_groups)
    
    # all training data
    all_train = all_train_df.to_numpy()
    
    if buy_sell_labels_df is not None:
        decision_to_int = {'buy': 1, 'sell': 0}
        buy_sell_labels = buy_sell_labels_df.to_numpy()
    
    all_x_train, all_y_train = [], []
    for i in range(seq_len, len(all_train)):
        if buy_sell_labels_df is None: 
            all_x_train.append(all_train[i-seq_len:i])
            all_y_train.append(all_train[i][col_to_idx['Close']])
        else:
            decision = buy_sell_labels[i]
            if decision is not None:
                all_x_train.append(all_train[i-seq_len:i])
                all_y_train.append(decision_to_int[decision])    
    all_x_train, all_y_train = np.array(all_x_train), np.array(all_y_train)
    
    if fully_divisible_batch_sizes:
        final_batch_size, final_train_data_size = get_best_batch_size(len(all_x_train), min_batch_size, max_batch_size)
    else:
        final_batch_size, final_train_data_size = batch_size, len(all_x_train)
        
    all_x_train_len_orig = len(all_x_train)
    all_x_train, all_y_train = all_x_train[-final_train_data_size:], all_y_train[-final_train_data_size:]
    
    # split data
    if not just_train:
        if sum(split_percents) > 1:
            print(f'sum of split_percents {split_percents} should not exceed 1')
            return

        len_data = preprocessed_data_df.shape[0]
        # only need to pass validation and test split percentages and the rest will be used as training data
        val_p, test_p = split_percents
        # only need to define num rows for validation and test data split so that they remain constant w/ respect to the size of preprocessed_data_df
        # so that when plotting data of diffrernt moving averages they line up consistatnly
        val_len, test_len = int(len_data*val_p), int(len_data*test_p)

        test_data_df = preprocessed_data_df.iloc[-test_len:] 
        val_data_df = preprocessed_data_df.iloc[-(val_len+test_len):-test_len]
        train_data_df = preprocessed_data_df.iloc[:-(val_len+test_len)]
        
        if buy_sell_labels_df is not None:
            test_buy_sell_labels = buy_sell_labels_df.iloc[-test_len:].to_numpy() 
            val_buy_sell_labels = buy_sell_labels_df.iloc[-(val_len+test_len):-test_len].to_numpy()
            train_buy_sell_labels = buy_sell_labels_df.iloc[:-(val_len+test_len)].to_numpy()
        
        train_data_df, normalization_terms = normalize_data(train_data_df, train_data=True, groups=normalization_groups)   
        val_data_df = normalize_data(val_data_df, train_data=False, normalization_terms=normalization_terms)[0]
        test_data_df = normalize_data(test_data_df, train_data=False, normalization_terms=normalization_terms)[0]
        
        train_data = train_data_df.to_numpy()
        val_data = val_data_df.to_numpy()
        test_data = test_data_df.to_numpy()
        
        # training data
        x_train, y_train = [], []  
        for i in range(seq_len, len(train_data)):
            if buy_sell_labels_df is None: 
                x_train.append(train_data[i-seq_len:i])
                y_train.append(train_data[i][col_to_idx['Close']])
            else:
                decision = train_buy_sell_labels[i]
                if decision is not None:
                    x_train.append(train_data[i-seq_len:i])
                    y_train.append(decision_to_int[decision])
        x_train, y_train = np.array(x_train), np.array(y_train)

        # validation data
        x_val, y_val = [], []
        for i in range(seq_len, len(val_data)):
            if buy_sell_labels_df is None: 
                x_val.append(val_data[i-seq_len:i])
                y_val.append(val_data[i][col_to_idx['Close']])
            else:
                decision = val_buy_sell_labels[i]
                if decision is not None:
                    x_val.append(val_data[i-seq_len:i])
                    y_val.append(decision_to_int[decision])
        x_val, y_val = np.array(x_val), np.array(y_val)

        # test data
        x_test, y_test = [], []
        for i in range(seq_len, len(test_data)):
            if buy_sell_labels_df is None: 
                x_test.append(test_data[i-seq_len:i])
                y_test.append(test_data[i][col_to_idx['Close']])
            else:
                decision = test_buy_sell_labels[i]
                if decision is not None:
                    x_test.append(test_data[i-seq_len:i])
                    y_test.append(decision_to_int[decision])
        x_test, y_test = np.array(x_test), np.array(y_test)
        
        if fully_divisible_batch_sizes:
            eval_batch_size, eval_train_data_size = get_best_batch_size(len(x_train), min_batch_size, max_batch_size)
        else:
            eval_batch_size, eval_train_data_size = batch_size, len(x_train)

        x_train_len_orig = len(x_train)
        x_train, y_train = x_train[-eval_train_data_size:], y_train[-eval_train_data_size:]
        
        if print_info:
            print('------------------------------------------------------')
            print(f'data w/ moving average window of {ma_window} info:\n')
            print(f'batch size for evaluation: {eval_batch_size}')
            print(f'training data size reduction for evaulation: {x_train_len_orig} -> {eval_train_data_size}')
            print(f'batch size for final training: {final_batch_size}')
            print(f'training data size reduction for final training: {all_x_train_len_orig} -> {final_train_data_size}\n')
            print(f'training data shape: x={x_train.shape}, y={y_train.shape}')
            print(f'validation data shape: x={x_val.shape}, y={y_val.shape}')
            print(f'test data shape: x={x_test.shape}, y={y_test.shape}')
            print(f'all train data shape: x={all_x_train.shape}, y={all_y_train.shape}')
            print('------------------------------------------------------')

        data_dict = {
            'ma_window': ma_window,
            'eval_batch_size': eval_batch_size,
            'final_batch_size': final_batch_size,
            'train_data_df': train_data_df,
            'val_data_df': val_data_df,
            'test_data_df': test_data_df,
            'all_train_df': all_train_df,
            'train_data_np': (x_train, y_train),
            'val_data_np': (x_val, y_val),
            'test_data_np': (x_test, y_test),
            'all_train_data_np': (all_x_train, all_y_train),
            'all_train_normalization_terms': all_train_normalization_terms
        }
    else:
        if print_info:
            print('------------------------------------------------------')
            print(f'data w/ moving average window of {ma_window} info:\n')
            print(f'batch size for final training: {final_batch_size}')
            print(f'training data size reduction for final training: {all_x_train_len_orig} -> {final_train_data_size}\n')
            print(f'all train data shape: x={all_x_train.shape}, y={all_y_train.shape}')
            print('------------------------------------------------------')

        data_dict = {
            'ma_window': ma_window,
            'final_batch_size': final_batch_size,
            'all_train_df': all_train_df,
            'all_train_data_np': (all_x_train, all_y_train),
            'all_train_normalization_terms': all_train_normalization_terms
        }
    return data_dict

In [None]:
tick_data_filepath = gi.download_mt5_data(cur_pair, timeframe, '2011-01-01', '2020-10-01')
data_with_indicators = gi.add_indicators_to_raw(filepath=tick_data_filepath,
                                                save_to_disk=True, 
                                                config=model_config, 
                                                has_headers=True,
                                                datetime_col='datetime')
data_with_ichi_sigs = add_features(data_with_indicators)

all_data = missing_labels_preprocess(data_with_ichi_sigs,None,None)[0]
all_data_orig = all_data

In [None]:
fast_ma_data = get_split_data_ma(all_data, ma_window=fast_ma_window, seq_len=seq_len, split_percents=split_percents, fully_divisible_batch_sizes=True,
                                 normalization_groups=normalization_groups, pc_cols=pc_cols, ma_cols=ma_cols, min_batch_size=1000, max_batch_size=2000)
slow_ma_data = get_split_data_ma(all_data, ma_window=slow_ma_window, seq_len=seq_len, split_percents=split_percents, fully_divisible_batch_sizes=True,
                                 normalization_groups=normalization_groups, pc_cols=pc_cols, ma_cols=ma_cols, min_batch_size=1000, max_batch_size=2000)

x_train_fast_ma, y_train_fast_ma = fast_ma_data['train_data_np']
x_val_fast_ma, y_val_fast_ma = fast_ma_data['val_data_np']
x_test_fast_ma, y_test_fast_ma = fast_ma_data['test_data_np']

x_train_slow_ma, y_train_slow_ma = slow_ma_data['train_data_np']
x_val_slow_ma, y_val_slow_ma = slow_ma_data['val_data_np']
x_test_slow_ma, y_test_slow_ma = slow_ma_data['test_data_np']

# process orignal price data for plotting comparison

all_data_orig = apply_perc_change(all_data_orig, cols=pc_cols)
all_data_orig.dropna(how='any', axis=0, inplace=True) # drop any NA rows due to applying percentage change

train_data_df_orig = all_data_orig.iloc[:fast_ma_data['train_data_df'].index[-1]+1]
val_data_df_orig = all_data_orig.iloc[fast_ma_data['train_data_df'].index[-1]+1:fast_ma_data['val_data_df'].index[-1]+1]
test_data_df_orig = all_data_orig.iloc[fast_ma_data['val_data_df'].index[-1]+1:]

train_data_df_orig, normalization_terms_2 = normalize_data(train_data_df_orig, train_data=True, groups=normalization_groups)   
val_data_df_orig, normalization_terms_2 = normalize_data(val_data_df_orig, train_data=False, normalization_terms=normalization_terms_2)
test_data_df_orig = normalize_data(test_data_df_orig, train_data=False, normalization_terms=normalization_terms_2)[0]

train_data_orig = train_data_df_orig.to_numpy()
val_data_orig = val_data_df_orig.to_numpy()
test_data_orig = test_data_df_orig.to_numpy()

In [None]:
for tup in ((fast_ma_data, 'Fast MA'), (slow_ma_data, 'Slow MA')):
    data, marker = tup
    
    train_data_df = data['train_data_df']
    val_data_df = data['val_data_df']
    test_data_df = data['test_data_df']

    train_data = train_data_df.to_numpy()
    val_data = val_data_df.to_numpy()
    test_data = test_data_df.to_numpy()

    fig = plt.figure(figsize=(15,10))
    st = fig.suptitle(f'{marker} Data Separation', fontsize=20)
    st.set_y(0.92)

    ###############################################################################

    ax1 = fig.add_subplot(211)
    ax1.plot(np.arange(train_data.shape[0]), train_data_df['Close'], label='Training data')

    ax1.plot(np.arange(train_data.shape[0], 
                       train_data.shape[0]+val_data.shape[0]), val_data_df['Close'], label='Validation data')

    ax1.plot(np.arange(train_data.shape[0]+val_data.shape[0], 
                       train_data.shape[0]+val_data.shape[0]+test_data.shape[0]), test_data_df['Close'], label='Test data')
    ax1.set_xlabel('Date')
    ax1.set_ylabel(f'{marker} Normalized Closing Returns')

    ###############################################################################

    ax2 = fig.add_subplot(212)
    ax2.plot(np.arange(train_data.shape[0]), train_data_df['Volume'], label='Training data')

    ax2.plot(np.arange(train_data.shape[0], 
                       train_data.shape[0]+val_data.shape[0]), val_data_df['Volume'], label='Validation data')

    ax2.plot(np.arange(train_data.shape[0]+val_data.shape[0], 
                       train_data.shape[0]+val_data.shape[0]+test_data.shape[0]), test_data_df['Volume'], label='Test data')
    ax2.set_xlabel('Date')
    ax2.set_ylabel(f'{marker} Normalized Volume Changes')

    plt.legend(loc='best')

#### define model

In [None]:
def Inception_A(layer_in, c7):
    branch1x1_1 = layers.Conv1D(c7, kernel_size=1, padding="same", use_bias=False)(layer_in)
    branch1x1 = layers.BatchNormalization()(branch1x1_1)
    branch1x1 = layers.ReLU()(branch1x1)

    branch5x5_1 = layers.Conv1D(c7, kernel_size=1, padding='same', use_bias=False)(layer_in)
    branch5x5 = layers.BatchNormalization()(branch5x5_1)
    branch5x5 = layers.ReLU()(branch5x5)
    branch5x5 = layers.Conv1D(c7, kernel_size=5, padding='same', use_bias=False)(branch5x5)
    branch5x5 = layers.BatchNormalization()(branch5x5)
    branch5x5 = layers.ReLU()(branch5x5)  

    branch3x3_1 = layers.Conv1D(c7, kernel_size=1, padding='same', use_bias=False)(layer_in)
    branch3x3 = layers.BatchNormalization()(branch3x3_1)
    branch3x3 = layers.ReLU()(branch3x3)
    branch3x3 = layers.Conv1D(c7, kernel_size=3, padding='same', use_bias=False)(branch3x3)
    branch3x3 = layers.BatchNormalization()(branch3x3)
    branch3x3 = layers.ReLU()(branch3x3)
    branch3x3 = layers.Conv1D(c7, kernel_size=3, padding='same', use_bias=False)(branch3x3)
    branch3x3 = layers.BatchNormalization()(branch3x3)
    branch3x3 = layers.ReLU()(branch3x3) 

    branch_pool = layers.AveragePooling1D(pool_size=(3), strides=1, padding='same')(layer_in)
    branch_pool = layers.Conv1D(c7, kernel_size=1, padding='same', use_bias=False)(branch_pool)
    branch_pool = layers.BatchNormalization()(branch_pool)
    branch_pool = layers.ReLU()(branch_pool)
    outputs = layers.Concatenate(axis=-1)([branch1x1, branch5x5, branch3x3, branch_pool])
    return outputs


def Inception_B(layer_in, c7):
    branch3x3 = layers.Conv1D(c7, kernel_size=3, padding="same", strides=2, use_bias=False)(layer_in)
    branch3x3 = layers.BatchNormalization()(branch3x3)
    branch3x3 = layers.ReLU()(branch3x3)  

    branch3x3dbl = layers.Conv1D(c7, kernel_size=1, padding="same", use_bias=False)(layer_in)
    branch3x3dbl = layers.BatchNormalization()(branch3x3dbl)
    branch3x3dbl = layers.ReLU()(branch3x3dbl)  
    branch3x3dbl = layers.Conv1D(c7, kernel_size=3, padding="same", use_bias=False)(branch3x3dbl)  
    branch3x3dbl = layers.BatchNormalization()(branch3x3dbl)
    branch3x3dbl = layers.ReLU()(branch3x3dbl)  
    branch3x3dbl = layers.Conv1D(c7, kernel_size=3, padding="same", strides=2, use_bias=False)(branch3x3dbl)    
    branch3x3dbl = layers.BatchNormalization()(branch3x3dbl)
    branch3x3dbl = layers.ReLU()(branch3x3dbl)   

    branch_pool = layers.MaxPooling1D(pool_size=3, strides=2, padding="same")(layer_in)

    outputs = layers.Concatenate(axis=-1)([branch3x3, branch3x3dbl, branch_pool])
    return outputs


def Inception_C(layer_in, c7):
    branch1x1_1 = layers.Conv1D(c7, kernel_size=1, padding="same", use_bias=False)(layer_in)
    branch1x1 = layers.BatchNormalization()(branch1x1_1)
    branch1x1 = layers.ReLU()(branch1x1)   

    branch7x7_1 = layers.Conv1D(c7, kernel_size=1, padding="same", use_bias=False)(layer_in)
    branch7x7 = layers.BatchNormalization()(branch7x7_1)
    branch7x7 = layers.ReLU()(branch7x7)   
    branch7x7 = layers.Conv1D(c7, kernel_size=(7), padding="same", use_bias=False)(branch7x7)
    branch7x7 = layers.BatchNormalization()(branch7x7)
    branch7x7 = layers.ReLU()(branch7x7)  
    branch7x7 = layers.Conv1D(c7, kernel_size=(1), padding="same", use_bias=False)(branch7x7)  
    branch7x7 = layers.BatchNormalization()(branch7x7)
    branch7x7 = layers.ReLU()(branch7x7)   

    branch7x7dbl_1 = layers.Conv1D(c7, kernel_size=1, padding="same", use_bias=False)(layer_in)  
    branch7x7dbl = layers.BatchNormalization()(branch7x7dbl_1)
    branch7x7dbl = layers.ReLU()(branch7x7dbl)  
    branch7x7dbl = layers.Conv1D(c7, kernel_size=(7), padding="same", use_bias=False)(branch7x7dbl)  
    branch7x7dbl = layers.BatchNormalization()(branch7x7dbl)
    branch7x7dbl = layers.ReLU()(branch7x7dbl) 
    branch7x7dbl = layers.Conv1D(c7, kernel_size=(1), padding="same", use_bias=False)(branch7x7dbl)  
    branch7x7dbl = layers.BatchNormalization()(branch7x7dbl)
    branch7x7dbl = layers.ReLU()(branch7x7dbl)  
    branch7x7dbl = layers.Conv1D(c7, kernel_size=(7), padding="same", use_bias=False)(branch7x7dbl)  
    branch7x7dbl = layers.BatchNormalization()(branch7x7dbl)
    branch7x7dbl = layers.ReLU()(branch7x7dbl)  
    branch7x7dbl = layers.Conv1D(c7, kernel_size=(1), padding="same", use_bias=False)(branch7x7dbl)  
    branch7x7dbl = layers.BatchNormalization()(branch7x7dbl)
    branch7x7dbl = layers.ReLU()(branch7x7dbl)  

    branch_pool = layers.AveragePooling1D(pool_size=3, strides=1, padding='same')(layer_in)
    branch_pool = layers.Conv1D(c7, kernel_size=1, padding='same', use_bias=False)(branch_pool)
    branch_pool = layers.BatchNormalization()(branch_pool)
    branch_pool = layers.ReLU()(branch_pool)  

    outputs = layers.Concatenate(axis=-1)([branch1x1, branch7x7, branch7x7dbl, branch_pool])
    return outputs


def create_model(seq_len, num_features):
    in_seq = layers.Input(shape=(seq_len, num_features))

    x = Inception_A(in_seq, 32)
    x = Inception_A(x, 32)
    x = Inception_B(x, 32)
    x = Inception_B(x, 32)
    x = Inception_C(x, 32)
    x = Inception_C(x, 32)    

    x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
    x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
    x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x) 

    avg_pool = layers.GlobalAveragePooling1D()(x)
    max_pool = layers.GlobalMaxPooling1D()(x)
    conc = layers.concatenate([avg_pool, max_pool])
    conc = layers.Dense(64, activation="relu")(conc)
    out = layers.Dense(1, activation="sigmoid")(conc)      

    model = keras.Model(inputs=in_seq, outputs=out)
    model.compile(loss="mse", optimizer="adam", metrics=['mae', 'mape'])     
    return model

# def create_model(seq_len, num_features):
#     in_seq = layers.Input(shape = (seq_len, num_features))

#     x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(in_seq)
#     x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
#     x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x) 

#     avg_pool = layers.GlobalAveragePooling1D()(x)
#     max_pool = layers.GlobalMaxPooling1D()(x)
#     conc = layers.concatenate([avg_pool, max_pool])
#     conc = layers.Dense(64, activation="relu")(conc)
#     out = layers.Dense(1, activation="linear")(conc)      

#     model = keras.Model(inputs=in_seq, outputs=out)
#     model.compile(loss="mse", optimizer="adam", metrics=['mae', 'mape'])    
#     return model

def create_model_binary(seq_len, num_features):
#     in_seq = layers.Input(shape=(seq_len, num_features))

#     x = Inception_A(in_seq, 32)
#     x = Inception_A(x, 32)
#     x = Inception_B(x, 32)
#     x = Inception_B(x, 32)
#     x = Inception_C(x, 32)
#     x = Inception_C(x, 32)    

#     x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
#     x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
#     x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x) 

#     avg_pool = layers.GlobalAveragePooling1D()(x)
#     max_pool = layers.GlobalMaxPooling1D()(x)
#     conc = layers.concatenate([avg_pool, max_pool])
#     conc = layers.Dense(64, activation="relu")(conc)
#     out = layers.Dense(1, activation="sigmoid")(conc)      

#     model = keras.Model(inputs=in_seq, outputs=out)
#     model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy', 'AUC']) 

    model = keras.Sequential()
    model.add(layers.Input(shape=(seq_len, num_features)))
    model.add(layers.Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
    model.add(layers.MaxPooling1D(pool_size=2))
    model.add(layers.LSTM(100))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

#### try using model as buy/sell classifier 

In [None]:
# hyperparameters

label_non_signals = False
min_profit_percent, profit_noise_percent = 0.01, 0.01
contract_size = 100_000   # size of 1 lot is typically 100,000 (100 for gold, becuase 1 lot = 100 oz of gold)
lots_per_trade = 0.2  
in_quote_currency = True
pip_resolution = 0.0001

labels_dict = {1: 'buy', 0: 'sell'}
n_estimators = 3000
max_depth = 2
learning_rate = 0.1
subsample = 1
colsample_bytree = 1
gamma = 0
num_class = 3 # buy, sell, wait
tenkan_period = 8
kijun_period = 22
senkou_b_period = 44
model_config = {
    'current_model':'ichi_cloud',
    'ichi_cloud':{
        'indicators': {
            'ichimoku': {
                'tenkan_period': tenkan_period,
                'kijun_period': kijun_period,
                'chikou_period': kijun_period,
                'senkou_b_period': senkou_b_period
            },
            'rsi': {
                'periods': 14
            }
        }
    }
}
signals_to_consider = ['cloud_breakout_bull','cloud_breakout_bear',                       # cloud breakout
                       'tk_cross_bull_strength', 'tk_cross_bear_strength',                # Tenkan Sen / Kijun Sen Cross
                       'tk_price_cross_bull_strength', 'tk_price_cross_bear_strength',    # price crossing both the Tenkan Sen / Kijun Sen
                       'senkou_cross_bull_strength', 'senkou_cross_bear_strength',        # Senkou Span Cross
                       'chikou_cross_bull_strength', 'chikou_cross_bear_strength']        # Chikou Span Cross
sigs_for_filename = ['cb-tk-tkp-sen-chi']

# get data

cur_pair = 'EURUSD'
timeframe = 'H1'
tick_data_filepath = gi.download_mt5_data(cur_pair, timeframe, '2011-01-01', '2020-10-01')
data_with_indicators = gi.add_indicators_to_raw(filepath=tick_data_filepath, 
                                                save_to_disk=True, 
                                                config=model_config, 
                                                has_headers=True,
                                                datetime_col='datetime')
train_data = add_features(data_with_indicators)

train_data_labels = generate_ichimoku_labels(train_data, label_non_signals=label_non_signals, min_profit_percent=min_profit_percent, 
                                             profit_noise_percent=profit_noise_percent, signals_to_consider=signals_to_consider, 
                                             contract_size=contract_size, lots_per_trade=lots_per_trade,
                                             in_quote_currency=in_quote_currency,pip_resolution=pip_resolution)

start_idx, end_idx = no_missing_data_idx_range(train_data, early_ending_cols=['chikou_span_visual'])
train_data = train_data.iloc[start_idx:end_idx+1]
train_data = dummy_and_remove_features(train_data)
train_data_labels = train_data_labels.iloc[start_idx:end_idx+1]

In [None]:
data_dict = get_split_data_ma(train_data, ma_window=None, seq_len=128, split_percents=split_percents, fully_divisible_batch_sizes=True,
                              normalization_groups=normalization_groups, pc_cols=pc_cols, ma_cols=ma_cols, min_batch_size=1000, max_batch_size=2000,
                              buy_sell_labels_df=train_data_labels['first_decision'], apply_pct_change=False)

x_train, y_train = data_dict['train_data_np']
x_val, y_val = data_dict['val_data_np']
x_test, y_test = data_dict['test_data_np']

binary_model = create_model_binary(seq_len=x_train.shape[1], num_features=x_train.shape[2])
    
filepath = f'../my_stuff/test_model.hdf5'
callback = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_accuracy', save_best_only=True, verbose=1)
                                    
binary_model.fit(convert_to_tensor(x_train), convert_to_tensor(y_train),
                  batch_size=data_dict['eval_batch_size'],
                  callbacks=[callback],
                  epochs=num_epochs,
                  validation_data=(convert_to_tensor(x_val), convert_to_tensor(y_val)))

#### try using model for close price forcasting

In [None]:
fast_ma_window = fast_ma_data['ma_window']
filepath = f'../my_stuff/{cur_pair}-{timeframe}_Bi-LSTM_{fast_ma_window}-ma_{tenkan_period}-{kijun_period}-{senkou_b_period}-ichi.hdf5'
callback = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', save_best_only=True, verbose=1)

fast_ma_model = create_model(seq_len=x_train_fast_ma.shape[1], num_features=x_train_fast_ma.shape[2])
# print(fast_ma_model.summary())

start_t = time.time()

fast_ma_model.fit(convert_to_tensor(x_train_fast_ma), convert_to_tensor(y_train_fast_ma),
                  batch_size=fast_ma_data['eval_batch_size'],
                  callbacks=[callback],
                  epochs=num_epochs,
                  validation_data=(convert_to_tensor(x_val_fast_ma), convert_to_tensor(y_val_fast_ma)))

print(f'training time = {(time.time()-start_t)/60} min')

In [None]:
slow_ma_window = slow_ma_data['ma_window']
filepath = f'../my_stuff/{cur_pair}-{timeframe}_Bi-LSTM_{slow_ma_window}-ma_{tenkan_period}-{kijun_period}-{senkou_b_period}-ichi.hdf5'
callback = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', save_best_only=True, verbose=1)

slow_ma_model = create_model(seq_len=x_train_slow_ma.shape[1], num_features=x_train_slow_ma.shape[2])

start_t = time.time()

slow_ma_model.fit(convert_to_tensor(x_train_slow_ma), convert_to_tensor(y_train_slow_ma),
                  batch_size=slow_ma_data['eval_batch_size'],
                  callbacks=[callback],
                  epochs=num_epochs,
                  #shuffle=True,
                  validation_data=(convert_to_tensor(x_val_slow_ma), convert_to_tensor(y_val_slow_ma)))

print(f'training time = {(time.time()-start_t)/60} min')

In [None]:
fast_ma_model = tf.keras.models.load_model('../my_stuff/final_Bi-LSTM_fast_5_ma.hdf5')#('../my_stuff/Bi-LSTM_mov_avg_5.hdf5')

print('done loading fast ma model')

# #Calculate predication for training, validation and test data
# train_pred_fast_ma = fast_ma_model.predict(convert_to_tensor(x_train_fast_ma))
# val_pred_fast_ma = fast_ma_model.predict(convert_to_tensor(x_val_fast_ma))
# test_pred_fast_ma = fast_ma_model.predict(convert_to_tensor(x_test_fast_ma))

#Print evaluation metrics for all datasets
train_eval_fast_ma = fast_ma_model.evaluate(convert_to_tensor(x_train_fast_ma), convert_to_tensor(y_train_fast_ma), verbose=0)
val_eval_fast_ma = fast_ma_model.evaluate(convert_to_tensor(x_val_fast_ma), convert_to_tensor(y_val_fast_ma), verbose=0)
test_eval_fast_ma = fast_ma_model.evaluate(convert_to_tensor(x_test_fast_ma), convert_to_tensor(y_test_fast_ma), verbose=0)

print('Evaluation metrics')
print('Training Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(train_eval_fast_ma[0], train_eval_fast_ma[1], train_eval_fast_ma[2]))
print('Validation Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(val_eval_fast_ma[0], val_eval_fast_ma[1], val_eval_fast_ma[2]))
print('Test Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(test_eval_fast_ma[0], test_eval_fast_ma[1], test_eval_fast_ma[2]))

In [None]:
slow_ma_model = tf.keras.models.load_model('../my_stuff/final_Bi-LSTM_slow_13_ma.hdf5')#('../my_stuff/Bi-LSTM_mov_avg_13.hdf5')

print('done loading slow ma model')

#Calculate predication for training, validation and test data
train_pred_slow_ma = slow_ma_model.predict(convert_to_tensor(x_train_slow_ma))
val_pred_slow_ma = slow_ma_model.predict(convert_to_tensor(x_val_slow_ma))
test_pred_slow_ma = slow_ma_model.predict(convert_to_tensor(x_test_slow_ma))

#Print evaluation metrics for all datasets
train_eval_slow_ma = slow_ma_model.evaluate(convert_to_tensor(x_train_slow_ma), convert_to_tensor(y_train_slow_ma), verbose=0)
val_eval_slow_ma = slow_ma_model.evaluate(convert_to_tensor(x_val_slow_ma), convert_to_tensor(y_val_slow_ma), verbose=0)
test_eval_slow_ma = slow_ma_model.evaluate(convert_to_tensor(x_test_slow_ma), convert_to_tensor(y_test_slow_ma), verbose=0)

print('Evaluation metrics')
print('Training Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(train_eval_slow_ma[0], train_eval_slow_ma[1], train_eval_slow_ma[2]))
print('Validation Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(val_eval_slow_ma[0], val_eval_slow_ma[1], val_eval_slow_ma[2]))
print('Test Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(test_eval_slow_ma[0], test_eval_slow_ma[1], test_eval_slow_ma[2]))

In [None]:
test_data_fast_ma = fast_ma_data['test_data_df'].to_numpy()

test_data_slow_ma = slow_ma_data['test_data_df'].to_numpy()

fig = plt.figure(figsize=(15,15))
st = fig.suptitle("CNN + Bi-LSTM Model", fontsize=22)
st.set_y(1.02)

# #Plot training data results
# ax11 = fig.add_subplot(311)
# ax11.plot(train_data[seq_len:, 3], label='EURUSD Closing Returns')
# ax11.plot(train_pred, color='yellow', linewidth=3, label='Predicted EURUSD Closing Returns')
# ax11.set_title("Training Data", fontsize=18)
# ax11.set_xlabel('Date')
# ax11.set_ylabel('EURUSD Closing Returns')

# #Plot validation data results
# ax21 = fig.add_subplot(312)
# ax21.plot(val_data[seq_len:, 3], label='EURUSD Closing Returns')
# ax21.plot(val_pred, color='yellow', linewidth=3, label='Predicted EURUSD Closing Returns')
# ax21.set_title("Validation Data", fontsize=18)
# ax21.set_xlabel('Date')
# ax21.set_ylabel('EURUSD Closing Returns')

#Plot test data results
ax31 = fig.add_subplot(111)
ax31.plot(test_data_fast_ma[seq_len:, 3], label='EURUSD closing mov avg 5')
ax31.plot(test_pred_fast_ma, linewidth=3, label='Predicted EURUSD closing mov avg 5')
ax31.plot(test_data_slow_ma[seq_len:, 3], label='EURUSD closing mov avg 13')
ax31.plot(test_pred_slow_ma, linewidth=3, label='Predicted EURUSD closing mov avg 13')
ax31.plot(test_data_orig[:, 3], label='Original EURUSD Closing Returns')
ax31.set_title("Test Data", fontsize=18)
ax31.set_xlabel('Date')
ax31.set_ylabel('EURUSD Closing Returns')

plt.style.use('seaborn')
plt.tight_layout()
plt.legend(loc='best')
plt.show()

#### train models for backtesting

In [None]:
fast_ma_window = fast_ma_data['ma_window']
filepath = f'../my_stuff/final_{cur_pair}-{timeframe}_Bi-LSTM_{fast_ma_window}-ma_{tenkan_period}-{kijun_period}-{senkou_b_period}-ichi.hdf5'
callback = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='loss', save_best_only=True, verbose=1)

all_x_train_fast_ma, all_y_train_fast_ma = fast_ma_data['all_train_data_np']
fast_ma_model = create_model(seq_len=all_x_train_fast_ma.shape[1], num_features=all_x_train_fast_ma.shape[2])

start_t = time.time()

fast_ma_model.fit(conc8vert_to_tensor(all_x_train_fast_ma), convert_to_tensor(all_y_train_fast_ma),
                  batch_size=fast_ma_data['final_batch_size'],
                  callbacks=[callback],
                  epochs=num_epochs)

print(f'training time = {(time.time()-start_t)/60} min')

In [None]:
slow_ma_window = slow_ma_data['ma_window']
filepath = f'../my_stuff/final_{cur_pair}-{timeframe}_Bi-LSTM_{slow_ma_window}-ma_{tenkan_period}-{kijun_period}-{senkou_b_period}-ichi.hdf5'
callback = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='loss', save_best_only=True, verbose=1)

all_x_train_slow_ma, all_y_train_slow_ma = slow_ma_data['all_train_data_np']
slow_ma_model = create_model(seq_len=all_x_train_slow_ma.shape[1], num_features=all_x_train_slow_ma.shape[2])

start_t = time.time()

slow_ma_model.fit(convert_to_tensor(all_x_train_slow_ma), convert_to_tensor(all_y_train_slow_ma),
                  batch_size=slow_ma_data['final_batch_size'],
                  callbacks=[callback],
                  epochs=num_epochs)

print(f'training time = {(time.time()-start_t)/60} min')

# backtest models (xgboost for opening and CNN+Bi-LSTM for closing)

#### prepare CNN+Bi-LSTM models and preprocessing vars

In [15]:
fast_ma_model = tf.keras.models.load_model('../my_stuff/final_EURUSD-H1_Bi-LSTM_7-ma_9-30-60-ichi.hdf5')
# slow_ma_model = tf.keras.models.load_model('../my_stuff/final_EURUSD-H1_Bi-LSTM_7-ma_9-30-60-ichi.hdf5')

fast_ma_window = 7
# slow_ma_window = 7
lstm_seq_len = 128

print(f'fast MA window: {fast_ma_window}')
# print(f'slow MA window: {slow_ma_window}')
print(f'sequence length for LSTMs: {lstm_seq_len}')

fast MA window: 7
sequence length for LSTMs: 128


#### prepare xgboost models and preprocessing vars

In [37]:
xgb_decision_predictor = xgb.Booster()
xgb_decision_predictor.load_model('../my_stuff/EURUSD-H1_0.01-min_profit_0.2-lots_right-cur_side_9-30-60-cb-tk-tkp-sen-chi-ichi_xgb_classifier.json')
xgb_labels_dict = {1: 'buy', 0: 'sell'}
open_trade_sigs = ['cloud_breakout_bull','cloud_breakout_bear',                       # cloud breakout
                   'tk_cross_bull_strength', 'tk_cross_bear_strength',                # Tenkan Sen / Kijun Sen Cross
                   'tk_price_cross_bull_strength', 'tk_price_cross_bear_strength',    # price crossing both the Tenkan Sen / Kijun Sen
                   'senkou_cross_bull_strength', 'senkou_cross_bear_strength',        # Senkou Span Cross
                   'chikou_cross_bull_strength', 'chikou_cross_bear_strength']        # Chikou Span Cross

print(f'labels dict for XGB classifier:\n\t{xgb_labels_dict}')
print('signals to consider for opening trades:')
for sig in open_trade_sigs:
    print(f'\t{sig}')

labels dict for XGB classifier:
	{1: 'buy', 0: 'sell'}
signals to consider for opening trades:
	cloud_breakout_bull
	cloud_breakout_bear
	tk_cross_bull_strength
	tk_cross_bear_strength
	tk_price_cross_bull_strength
	tk_price_cross_bear_strength
	senkou_cross_bull_strength
	senkou_cross_bear_strength
	chikou_cross_bull_strength
	chikou_cross_bear_strength


#### global hyperparameters for backtest

In [45]:
# all parameters SHOULD match what the models were trained on for best results (so far this assumption is consistant)

# independant params
min_profit_percent, profit_noise_percent = 0.002, 0.002
contract_size = 100_000   # size of 1 lot is typically 100,000 (100 for gold, becuase 1 lot = 100 oz of gold)
lots_per_trade = 0.2  
starting_balance = 1000
leverage = 500    # 1:leverage
max_concurrent_trades = np.inf
currency_side = 'right'
in_quote_currency = True if currency_side == 'right' else False
pip_resolution = 0.0001
stop_out_pct = 0.2  # explaination: https://www.tradersway.com/new_to_the_market/forex_and_cfd_basics#margin
fast_ma_diff_thresh = 0.039  #0.02
# slow_ma_diff_thresh = 0.05   #0.02
# fast_ma_diff_thresh_profit = 0.03  #0.02
# slow_ma_diff_thresh_profit = 0.03   #0.02
# fast_ma_diff_thresh_loss = 0.03 
# slow_ma_diff_thresh_loss = 0.03   
decision_prob_diff_thresh = 0.46   # 0.5 accepts all probabilities
tenkan_period = 9
kijun_period = 30
senkou_b_period = 60
label_non_signals=False
hedged_margin = 50_000
tradersway_commodity = False

# dependant params (don't edit)
pip_value = contract_size * lots_per_trade * pip_resolution   # in quote currency (right side currency of currency pair)
min_profit = min_profit_percent * lots_per_trade * contract_size   # in base currecy because thats what models were traied on
profit_noise = profit_noise_percent * lots_per_trade * contract_size   # in base currecy because thats what models were traied on

#### prepare data for backtest

In [46]:
ma_cols = ['Open','High','Low','Close','Volume']
pc_cols = ['Open','High','Low','Close','Volume',
           'trend_ichimoku_base','trend_ichimoku_conv',
           'trend_ichimoku_a', 'trend_ichimoku_b']
normalization_groups = [['Open','High','Low','Close'],  # prices
                        ['trend_ichimoku_base','trend_ichimoku_conv'],  # ichi conv & base lines
                        ['trend_ichimoku_a', 'trend_ichimoku_b'], # ichi cloud lines
                        ['tk_cross_bull_strength','tk_cross_bear_strength',   # tk cross strength
                        'tk_price_cross_bull_strength','tk_price_cross_bear_strength',   # tk price cross strength
                        'senkou_cross_bull_strength','senkou_cross_bear_strength',   # semkou cross strength
                        'chikou_cross_bull_strength','chikou_cross_bear_strength']]   # chikou cross strength

model_config = {
    'current_model':'ichi_cloud',
    'ichi_cloud':{
        'indicators': {
            'ichimoku': {
                'tenkan_period': tenkan_period,
                'kijun_period': kijun_period,
                'chikou_period': kijun_period,
                'senkou_b_period': senkou_b_period
            },
            'rsi': {
                'periods': 14
            }
        }
    }
}

# tick_data_filepath = gi.download_mt5_data("EURUSD", 'H1', '2020-10-02', '2021-01-05')
tick_data_filepath = gi.download_mt5_data("EURUSD", 'H1', '2020-11-02', '2021-01-05')
# tick_data_filepath = gi.download_mt5_data("EURUSD", 'H1', '2020-10-02', '2020-12-18')
data_with_indicators = gi.add_indicators_to_raw(filepath=tick_data_filepath, 
                                                save_to_disk=True, 
                                                config=model_config, 
                                                has_headers=True,
                                                datetime_col='datetime')

test_data_with_ichi_sigs = add_features(data_with_indicators)
model_data = dummy_and_remove_features(test_data_with_ichi_sigs)

start, stop = no_missing_data_idx_range(model_data)

model_data = model_data.iloc[start:stop+1]
model_data_np = model_data.to_numpy()

test_data_with_ichi_sigs = test_data_with_ichi_sigs.iloc[start:stop+1]
test_data_np = test_data_with_ichi_sigs.to_numpy()

ma_cols_set = set([model_data.columns.get_loc(col_name) for col_name in ma_cols])
pc_cols_set = set([model_data.columns.get_loc(col_name) for col_name in pc_cols])

feature_indices = {test_data_with_ichi_sigs.columns[i]: i for i in range(len(test_data_with_ichi_sigs.columns))}

fast_ma_data = get_split_data_ma(model_data, ma_window=fast_ma_window, seq_len=lstm_seq_len, split_percents=(0,0), 
                                  normalization_groups=normalization_groups, pc_cols=pc_cols, ma_cols=ma_cols, min_batch_size=1000, 
                                  max_batch_size=2000, just_train=True, print_info=False)
# slow_ma_data = get_split_data_ma(model_data, ma_window=slow_ma_window, seq_len=lstm_seq_len, split_percents=(0,0), 
#                                   normalization_groups=normalization_groups, pc_cols=pc_cols, ma_cols=ma_cols, min_batch_size=1000, 
#                                   max_batch_size=2000, just_train=True, print_info=False)

fast_ma_norm_terms = fast_ma_data['all_train_normalization_terms']
# slow_ma_norm_terms = slow_ma_data['all_train_normalization_terms']

test_data_labels = generate_ichimoku_labels(test_data_with_ichi_sigs, label_non_signals=label_non_signals, min_profit_percent=min_profit_percent, 
                                             profit_noise_percent=profit_noise_percent, signals_to_consider=open_trade_sigs, 
                                             contract_size=contract_size, lots_per_trade=lots_per_trade,
                                             in_quote_currency=in_quote_currency, pip_resolution=pip_resolution)

loaded 1053 rows of tick data from C:\GitHub Repos\ForexMachine\Data\.cache\mt5_EURUSD_h1_ticks_2020-11-02T00;00UTC_to_2021-01-05T00;00UTC.csv
saved 1053 rows of EURUSD h1 tick data to C:\GitHub Repos\ForexMachine\Data\RawData\mt5_EURUSD_h1_ticks_2020-11-02T00;00UTC_to_2021-01-05T00;00UTC.csv, done.


#### analyze test data to develop trading strategy

In [None]:
# plot move avg cnn+lstm preds vs price data

fast_ma_preds = fast_ma_model.predict(convert_to_tensor(fast_ma_data['all_train_data_np'][0]))
fast_ma_preds = np.reshape(fast_ma_preds,(fast_ma_preds.shape[0],))
fast_ma_preds = fast_ma_preds.tolist()
fill = [None]*(len(test_data_with_ichi_sigs) - len(fast_ma_preds))
fill.extend(fast_ma_preds)
fast_ma_preds = fill

# slow_ma_preds = slow_ma_model.predict(convert_to_tensor(slow_ma_data['all_train_data_np'][0]))
# slow_ma_preds = np.reshape(slow_ma_preds,(slow_ma_preds.shape[0],))
# slow_ma_preds = slow_ma_preds.tolist()
# fill = [None]*(len(test_data_with_ichi_sigs) - len(slow_ma_preds))
# fill.extend(slow_ma_preds)
# slow_ma_preds = fill

lstm_preds = pd.DataFrame({
    'fast_ma':fast_ma_preds, 
#     'slow_ma':slow_ma_preds
})

# import random
# test_data = [random.random() for i in range(len(test_data_with_ichi_sigs))]
# test_data2 = [random.random() for i in range(len(test_data_with_ichi_sigs))]
# test_data = {'testing1': test_data,
#              'testing2': test_data2}
# test_data=pd.DataFrame(test_data)
# show_data_from_range(test_data_with_ichi_sigs, '2020-10-12', '2020-10-16', 
#                      main_indicator='ichimoku', sub_indicators=[test_data,'rsi'], visualize_crosses=True,
#                      visualize_labels=True, labels_df=test_data_labels)

labels = ['first_decision','ticks_till_best_profit_first_decision', 'best_profit_first_decision', 'profit_peak_first_decision',
          'second_decision','ticks_till_best_profit_second_decision', 'best_profit_second_decision', 'profit_peak_second_decision']
show_data_from_range(test_data_with_ichi_sigs, '2020-11-12', '2020-12-17', 
                     main_indicator='ichimoku', sub_indicators=[lstm_preds], visualize_crosses=True,
                     visualize_labels=True, labels_df=test_data_labels, labels=labels)

#### backtest strat

In [47]:
trades = {}
backtest_trades = {}   # closed trades results
pending_order = None
pending_close = None
decisions_so_far = []
fast_ma_seq_buf = deque()
slow_ma_seq_buf = deque()
fast_ma_window_buf = deque()
slow_ma_window_buf = deque()
fast_ma_avgs = []
slow_ma_avgs = []
fast_ma_perc_chngs = []
slow_ma_perc_chngs = []
xgb_model_perc_chngs = []
fast_ma_preds = []
slow_ma_preds = []
cnn_lstm_pred_times = []
xgb_pred_times = []
free_margins = []
margins = []
margin_levels = []
equities = []
balances = []
open_trades_counts = []
losing_trades_counts = []
pct_done = 0
buffers_rdy_idx = None
balance = starting_balance
equity = starting_balance
free_margin = starting_balance
losing_trades = 0
margin_level = None
margin = None
final_dt = None
stop = False

start_time = time.time()
for i in range(len(test_data_np)):
    """
    fill data buffers for models
    """
    
    # for xgb model
    
    if i > 0:
        row = apply_perc_change_list(model_data_np[i-1], model_data_np[i], cols_set=pc_cols_set)
        xgb_model_perc_chngs.append(row)
    
    # for fast MA model
    
    fast_ma_window_buf.append(model_data_np[i])
    if len(fast_ma_window_buf) > fast_ma_window:
        fast_ma_window_buf.popleft()
    
    if len(fast_ma_window_buf) == fast_ma_window:
        row = apply_moving_avg_q(fast_ma_window_buf, ma_cols_set)
        fast_ma_avgs.append(row)
    
    if len(fast_ma_avgs) >= 2:
        row = apply_perc_change_list(fast_ma_avgs[-2], fast_ma_avgs[-1], pc_cols_set)
        row = normalize_data_list(row, fast_ma_norm_terms)
        fast_ma_perc_chngs.append(row) 
    
    if len(fast_ma_perc_chngs) > 0:
        fast_ma_seq_buf.append(fast_ma_perc_chngs[-1])
    
    if len(fast_ma_seq_buf) > lstm_seq_len:
        fast_ma_seq_buf.popleft()
    
    # for slow MA model
    
#     slow_ma_window_buf.append(model_data_np[i])
#     if len(slow_ma_window_buf) > slow_ma_window:
#         slow_ma_window_buf.popleft()
    
#     if len(slow_ma_window_buf) == slow_ma_window:
#         row = apply_moving_avg_q(slow_ma_window_buf, ma_cols_set)
#         slow_ma_avgs.append(row)
    
#     if len(slow_ma_avgs) >= 2:
#         row = apply_perc_change_list(slow_ma_avgs[-2], slow_ma_avgs[-1], pc_cols_set)
#         row = normalize_data_list(row, slow_ma_norm_terms)
#         slow_ma_perc_chngs.append(row)  
        
#     if len(slow_ma_perc_chngs) > 0:
#         slow_ma_seq_buf.append(slow_ma_perc_chngs[-1])
    
#     if len(slow_ma_seq_buf) > lstm_seq_len:
#         slow_ma_seq_buf.popleft()
    
    # now check if LSTMs have enough data to being trade simulation
    
#     if len(fast_ma_seq_buf) == lstm_seq_len and len(slow_ma_seq_buf) == lstm_seq_len:
    if len(fast_ma_seq_buf) == lstm_seq_len:
        """
        simulate trading
        """
        
        if buffers_rdy_idx is None:
            buffers_rdy_idx = i
            print('model buffers full, beginning trade sim...')

        # look for ichiomku signals
        causes = []
        for sig in open_trade_sigs:
            sig_i = feature_indices[sig]
            if test_data_np[i][sig_i] != 0:
                causes.append(sig)

        start = time.time()
        fast_ma_pred = fast_ma_model.predict(np.array([fast_ma_seq_buf]))
        slow_ma_pred = [[0]] #slow_ma_model.predict(np.array([slow_ma_seq_buf]))
        duration = time.time() - start
        cnn_lstm_pred_times.append(duration)

        fast_ma_preds.append(fast_ma_pred[0][0])
        slow_ma_preds.append(slow_ma_pred[0][0])
    
        if len(fast_ma_preds) > 1:
            fast_ma_diff = fast_ma_preds[-1] - fast_ma_preds[-2]    # remember this is the diff in the pct_change of the mov avg
            slow_ma_diff = slow_ma_preds[-1] - slow_ma_preds[-2]
        else:
            fast_ma_diff = 0
            slow_ma_diff = 0

        if pending_order is not None:
            pending_order_i, decision_label, decision_prob, order_causes, sig_fast_ma_diff, sig_slow_ma_diff  = pending_order
            open_price = test_data_np[i][feature_indices['Open']]
            decision_prob_diff = abs(decision_label-decision_prob)
            
            trades[pending_order_i] = {
                'decision_label': decision_label,
                'decision_prob': decision_prob,
                'causes': order_causes,
                'open_price': open_price,
                'trade_open_tick_i': i,
                'profit': None,
                'best_profit': None,
                'ticks_till_close': None,
                'close_idx': None,
                'lots': lots_per_trade,
                'look_to_close': False,
                'forced_close': False,
                'fast_ma_diff_at_sig': sig_fast_ma_diff,
                'slow_ma_diff_at_sig': sig_slow_ma_diff,
                'fast_ma_diff_at_close': None,
                'slow_ma_diff_at_close': None,
                'fast_ma_diff_at_best_sign_to_close': None,
                'slow_ma_diff_at_best_sign_to_close': None
            }
            
            required_margin = get_margin(trades, buy_label=1, sell_label=0, contract_size=contract_size, leverage=leverage, 
                                         tradersway_commodity=tradersway_commodity, in_quote_currency=in_quote_currency, hedged_margin=hedged_margin)
            
            # reference on opening trades and margin level https://www.luckscout.com/leverage-margin-balance-equity-free-margin-and-margin-level-in-forex-trading/
            if required_margin > free_margin or (margin_level is not None and margin_level <= 100) \
                    or len(trades) > max_concurrent_trades or decision_prob_diff > decision_prob_diff_thresh:
                del trades[pending_order_i]
            else:
                margin = required_margin
            
            pending_order = None
        
        # update equity and free margin based on currently opened trades
        losing_trades = 0
        for trade_i in trades:
            trade = trades[trade_i]
            close_price = test_data_np[i][feature_indices['Close']]
            trade_decision = xgb_labels_dict[trade['decision_label']]

            profit = get_profit(close_price, trade['open_price'], pip_value=pip_value, pip_resolution=pip_resolution, in_quote_currency=in_quote_currency)
            if trade_decision == 'sell':
                profit *= - 1
            
            if trade['profit'] is None:
                profit_delta = profit
            else:
                profit_delta = profit - trade['profit']
            trade['profit'] = profit
            
            if profit < 0:
                losing_trades += 1
            
            if trade['best_profit'] is None or profit > trade['best_profit']:
                trade['best_profit'] = profit
                if (fast_ma_diff < 0 and trade_decision == 'buy') or (fast_ma_diff > 0 and trade_decision == 'sell'):
                    trade['fast_ma_diff_at_best_sign_to_close'] = fast_ma_diff
                if (slow_ma_diff < 0 and trade_decision == 'buy') or (slow_ma_diff > 0 and trade_decision == 'sell'):
                    trade['slow_ma_diff_at_best_sign_to_close'] = slow_ma_diff
            
            equity += profit_delta
            free_margin = equity - margin 
            margin_level = equity / margin * 100
            
            scaled_profit_noise = profit_noise if not in_quote_currency else profit_noise / close_price
            if abs(profit) >= scaled_profit_noise:
                trade['look_to_close'] = True
        
        # check if equity is <= 0, and if so end the sim
        if equity <= 0:
            stop = True
            print(f'strat failed (i={i}, dt={test_data_np[i][feature_indices["datetime"]]}): no more equity')
            
        # check if trades should be closed due to stop-out starting with biggest loss if so
        if margin_level is not None and margin_level <= stop_out_pct:
            sorted_keys = sorted(trades, key=lambda trade_i: trades[trade_i]['profit'])
            for j, trade_i in enumerate(sorted_keys):
                balance += trades[trade_i]['profit']

                open_tick_i = trades[trade_i]['trade_open_tick_i']
                trades[trade_i]['ticks_till_close'] = i - open_tick_i
                trades[trade_i]['close_idx'] = i
                trades[trade_i]['forced_close'] = True
                trades[trade_i]['fast_ma_diff_at_close'] = fast_ma_diff
                trades[trade_i]['slow_ma_diff_at_close'] = slow_ma_diff
                backtest_trades[trade_i] = trades[trade_i]

                del trades[trade_i]
                
                if j != len(sorted_keys) - 1:
                    margin = get_margin(trades, buy_label=1, sell_label=0, contract_size=contract_size, leverage=leverage, tradersway_commodity=tradersway_commodity, 
                                        in_quote_currency=in_quote_currency, hedged_margin=hedged_margin)
                    free_margin = equity - margin
                    margin_level = equity / margin * 100                    
                    if margin_level > stop_out_pct:
                        break   
        
        # find trades to close based on CNN-LSTM preds
        closed_trades = []
        for trade_i in trades: 
            trade = trades[trade_i]
            trade_decision = xgb_labels_dict[trade['decision_label']]
        
            if trade['look_to_close']:
                if abs(fast_ma_diff) >= fast_ma_diff_thresh:
                    # (MA pct_change is decreasing on a long trade) or (MA pct_change is increasing on a short trade)
                    if (fast_ma_diff < 0 and trade_decision == 'buy') or (fast_ma_diff > 0 and trade_decision == 'sell'):  
                        closed_trades.append(trade_i)

        for trade_i in closed_trades:
            balance += trades[trade_i]['profit']
            
            open_tick_i = trades[trade_i]['trade_open_tick_i']
            trades[trade_i]['ticks_till_close'] = i - open_tick_i
            trades[trade_i]['close_idx'] = i
            trades[trade_i]['fast_ma_diff_at_close'] = fast_ma_diff
            trades[trade_i]['slow_ma_diff_at_close'] = slow_ma_diff
            backtest_trades[trade_i] = trades[trade_i]
            
            del trades[trade_i]
        
        if len(trades) == 0:
            margin = None
            margin_level = None

        # generate decision w/ XGB classifier and create pending order
        if len(causes) > 0 and not stop:
            start = time.time()
            model_input = pd.DataFrame([xgb_model_perc_chngs[-1]], columns=model_data.columns)
            model_input = xgb.DMatrix(model_input)
            decision_prob = xgb_decision_predictor.predict(model_input)[0]
            duration = time.time() - start # inlucde converting input in pred time
            xgb_pred_times.append(duration)
            
            decision_label = np.around(decision_prob)
            
#             if (decision_label == 1 and fast_ma_diff > 0) or (decision_label == 0 and fast_ma_diff < 0):
#                 pending_order = (i, decision_label, decision_prob, causes, fast_ma_diff, slow_ma_diff)
            pending_order = (i, decision_label, decision_prob, causes, fast_ma_diff, slow_ma_diff)

        cur_pct_done = int((i-buffers_rdy_idx+1) / (len(test_data_np)-buffers_rdy_idx) * 100)
        if cur_pct_done != pct_done and cur_pct_done % 10 == 0:
            pct_done = cur_pct_done
            print(f'backtest percentage done: {cur_pct_done}%')
    
    free_margins.append(free_margin)
    equities.append(equity)
    balances.append(balance)
    margins.append(margin)
    margin_levels.append(margin_level)
    open_trades_counts.append(len(trades))
    losing_trades_counts.append(losing_trades)
    
    final_dt = test_data_np[i][feature_indices["datetime"]]
    if stop:
        break

# print backtest results

backtest_runtime = time.time() - start_time
start_dt = test_data_np[buffers_rdy_idx][feature_indices['datetime']]
end_dt = final_dt

margin_levels_no_none = [ml for ml in margin_levels if ml is not None]
margins_no_none = [m for m in margins if m is not None]

num_won = 0
num_lost = 0
num_won_sells = 0
num_won_buys = 0
num_lost_sells = 0
num_lost_buys = 0
ma_diff_stat_names = ['fast_ma_diff_at_sig', 'slow_ma_diff_at_sig', 'fast_ma_diff_at_close', 'slow_ma_diff_at_close',
                      'fast_ma_diff_at_best_sign_to_close', 'slow_ma_diff_at_best_sign_to_close']
losses_ma_diff_stats = {name: {'list': [], 'agree_list':[], 'oppose_list':[]} for name in ma_diff_stat_names}
wins_ma_diff_stats = {name: {'list': [], 'agree_list':[], 'oppose_list':[]} for name in ma_diff_stat_names}
for trade_i in backtest_trades:
    trade = backtest_trades[trade_i]
    if trade['profit'] > 0:
        if trade['decision_label'] == 1:
            num_won_buys += 1
        else:
            num_won_sells += 1
        num_won += 1
        
        if (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] < 0):
            wins_ma_diff_stats['fast_ma_diff_at_sig']['agree_list'].append(abs(trade['fast_ma_diff_at_sig']))
        elif (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] > 0):
            wins_ma_diff_stats['fast_ma_diff_at_sig']['oppose_list'].append(abs(trade['fast_ma_diff_at_sig']))
        if (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] < 0):
            wins_ma_diff_stats['slow_ma_diff_at_sig']['agree_list'].append(abs(trade['slow_ma_diff_at_sig']))
        elif (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] > 0):
            wins_ma_diff_stats['slow_ma_diff_at_sig']['oppose_list'].append(abs(trade['slow_ma_diff_at_sig']))
        
        wins_ma_diff_stats['fast_ma_diff_at_close']['list'].append(abs(trade['fast_ma_diff_at_close']))
        wins_ma_diff_stats['slow_ma_diff_at_close']['list'].append(abs(trade['slow_ma_diff_at_close']))
        
        if trade['fast_ma_diff_at_best_sign_to_close'] is not None:
            wins_ma_diff_stats['fast_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['fast_ma_diff_at_best_sign_to_close']))
        if trade['slow_ma_diff_at_best_sign_to_close'] is not None:
            wins_ma_diff_stats['slow_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['slow_ma_diff_at_best_sign_to_close']))
    else:
        if trade['decision_label'] == 1:
            num_lost_buys += 1
        else:
            num_lost_sells += 1
        num_lost += 1
        
        if (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] < 0):
            losses_ma_diff_stats['fast_ma_diff_at_sig']['agree_list'].append(abs(trade['fast_ma_diff_at_sig']))
        elif (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] > 0):
            losses_ma_diff_stats['fast_ma_diff_at_sig']['oppose_list'].append(abs(trade['fast_ma_diff_at_sig']))
        if (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] < 0):
            losses_ma_diff_stats['slow_ma_diff_at_sig']['agree_list'].append(abs(trade['slow_ma_diff_at_sig']))
        elif (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] > 0):
            losses_ma_diff_stats['slow_ma_diff_at_sig']['oppose_list'].append(abs(trade['slow_ma_diff_at_sig']))
        
        losses_ma_diff_stats['fast_ma_diff_at_close']['list'].append(abs(trade['fast_ma_diff_at_close']))
        losses_ma_diff_stats['slow_ma_diff_at_close']['list'].append(abs(trade['slow_ma_diff_at_close']))
        
        if trade['fast_ma_diff_at_best_sign_to_close'] is not None:
            losses_ma_diff_stats['fast_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['fast_ma_diff_at_best_sign_to_close']))
        if trade['slow_ma_diff_at_best_sign_to_close'] is not None:
            losses_ma_diff_stats['slow_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['slow_ma_diff_at_best_sign_to_close']))
losses_ma_diff_stats = {name: {'arr': np.array(losses_ma_diff_stats[name]['list']), 
                               'agree_arr': np.array(losses_ma_diff_stats[name]['agree_list']), 
                               'oppose_arr': np.array(losses_ma_diff_stats[name]['oppose_list'])} for name in losses_ma_diff_stats}
wins_ma_diff_stats = {name: {'arr': np.array(wins_ma_diff_stats[name]['list']), 
                             'agree_arr': np.array(wins_ma_diff_stats[name]['agree_list']), 
                             'oppose_arr': np.array(wins_ma_diff_stats[name]['oppose_list'])} for name in wins_ma_diff_stats}

print('\n--------------------------------------------------------------------\n')
print('BACKTEST RESULTS:')
print(f'ticks data duration: {(end_dt-start_dt).days} days')
print(f'starting balance: {starting_balance}')
print(f'ending balance: {balance}')
print(f'number of trades won: {num_won}')
print(f'number of trades lost: {num_lost}')
print(f'number of buys: {num_won_buys+num_lost_buys} ({num_won_buys} won, {num_lost_buys} lost)')
print(f'number of sells: {num_won_sells+num_lost_sells} ({num_won_sells} won, {num_lost_sells} lost)')
print(f'balance range: [{min(balances)}, {max(balances)}]')
print(f'equity range: [{min(equities)}, {max(equities)}]')
print(f'free margin range: [{min(free_margins)}, {max(free_margins)}]')
print(f'margins range: [{min(margins_no_none)}, {max(margins_no_none)}]')
print(f'margin levels range: [{min(margin_levels_no_none)}, {max(margin_levels_no_none)}]')
print(f'concurrently open trades range: [{min(open_trades_counts)}, {max(open_trades_counts)}]')
print(f'concurrently losing trades range: [{min(losing_trades_counts)}, {max(losing_trades_counts)}]')
print(f'backtest runtime: {backtest_runtime/60} min')

print('\nWON TRADES RESULTS:')
for stat in wins_ma_diff_stats:
    stat_arr = wins_ma_diff_stats[stat]['arr']
    stat_agree_arr = wins_ma_diff_stats[stat]['agree_arr']
    stat_oppose_arr = wins_ma_diff_stats[stat]['oppose_arr']
    if len(stat_arr) > 0:
        print(f'{stat}: count={len(stat_arr)}, min={np.amin(stat_arr)}, max={np.amax(stat_arr)},'
              f' mean={np.mean(stat_arr)}, median={np.median(stat_arr)}')
    if len(stat_agree_arr) > 0:
        print(f'{stat} that aggreed: count={len(stat_agree_arr)}, min={np.amin(stat_agree_arr)}, max={np.amax(stat_agree_arr)},'
              f' mean={np.mean(stat_agree_arr)}, median={np.median(stat_agree_arr)}')
    if len(stat_oppose_arr) > 0:
        print(f'{stat} that opposed: count={len(stat_oppose_arr)}, min={np.amin(stat_oppose_arr)}, max={np.amax(stat_oppose_arr)},'
              f' mean={np.mean(stat_oppose_arr)}, median={np.median(stat_oppose_arr)}')

print('\nLOST TRADES RESULTS:')
for stat in losses_ma_diff_stats:
    stat_arr = losses_ma_diff_stats[stat]['arr']
    stat_agree_arr = losses_ma_diff_stats[stat]['agree_arr']
    stat_oppose_arr = losses_ma_diff_stats[stat]['oppose_arr']
    if len(stat_arr) > 0:
        print(f'{stat}: count={len(stat_arr)}, min={np.amin(stat_arr)}, max={np.amax(stat_arr)},'
              f' mean={np.mean(stat_arr)}, median={np.median(stat_arr)}')
    if len(stat_agree_arr) > 0:
        print(f'{stat} that aggreed: count={len(stat_agree_arr)}, min={np.amin(stat_agree_arr)}, max={np.amax(stat_agree_arr)},'
              f' mean={np.mean(stat_agree_arr)}, median={np.median(stat_agree_arr)}')
    if len(stat_oppose_arr) > 0:
        print(f'{stat} that opposed: count={len(stat_oppose_arr)}, min={np.amin(stat_oppose_arr)}, max={np.amax(stat_oppose_arr)},'
              f' mean={np.mean(stat_oppose_arr)}, median={np.median(stat_oppose_arr)}')

print('\nMODELS STATS:')
print(f'average pred time of fast & slow MA CNN+LSTM models: {sum(cnn_lstm_pred_times)/len(cnn_lstm_pred_times)*1000} ms')
print(f'average pred time of XGB model: {sum(xgb_pred_times)/len(xgb_pred_times)*1000} ms')

# plot strategy over time vs. price data

backtest_labels_col_names = ['decision_pred','ticks_till_best_profit_decision_pred', 'best_profit_decision_pred', 'profit_peak_decision_pred']
backtest_labels = []
for i in range(len(test_data_np)):
    if i in backtest_trades:
        trade = backtest_trades[i]
        trade_decision = xgb_labels_dict[trade['decision_label']]
        backtest_labels.append([trade_decision, trade['ticks_till_close'], trade['profit'], trade['close_idx']])
    else:
        backtest_labels.append([None]*len(backtest_labels_col_names))
backtest_labels = pd.DataFrame(backtest_labels, columns=backtest_labels_col_names)
backtest_labels = pd.concat((test_data_labels, backtest_labels.reset_index(drop=True)), axis=1)    

fill = [None] * buffers_rdy_idx
fill.extend(fast_ma_preds)
fast_ma_preds = fill 
fast_ma_preds.extend([None]*(len(test_data_np) - len(fast_ma_preds)))

fill = [None] * buffers_rdy_idx
fill.extend(slow_ma_preds)
slow_ma_preds = fill 
slow_ma_preds.extend([None]*(len(test_data_np) - len(slow_ma_preds)))

lstm_preds = pd.DataFrame({
    'fast_ma': fast_ma_preds, 
    'slow_ma': slow_ma_preds
})

balances.extend([None]*(len(test_data_np) - len(balances)))
equities.extend([None]*(len(test_data_np) - len(equities)))
free_margins.extend([None]*(len(test_data_np) - len(free_margins)))
open_trades_counts.extend([None]*(len(test_data_np) - len(open_trades_counts)))
losing_trades_counts.extend([None]*(len(test_data_np) - len(losing_trades_counts)))

strat_data_df = pd.DataFrame({
    'balance': balances,
    'equity': equities,
    'free margin': free_margins
})

open_trades_counts_df = pd.DataFrame({
    'open trades': open_trades_counts,
    'losing trades': losing_trades_counts
})

labels = [#'first_decision','ticks_till_best_profit_first_decision', 'best_profit_first_decision', 'profit_peak_first_decision',
          'decision_pred','ticks_till_best_profit_decision_pred', 'best_profit_decision_pred', 'profit_peak_decision_pred']
show_data_from_range(test_data_with_ichi_sigs, start_dt.isoformat(), end_dt.isoformat(), main_indicator='ichimoku', 
                     sub_indicators=[lstm_preds, strat_data_df, open_trades_counts_df], visualize_crosses=True, visualize_labels=True, 
                     labels_df=backtest_labels, labels=labels)

model buffers full, beginning trade sim...
backtest percentage done: 10%
backtest percentage done: 20%
backtest percentage done: 30%
backtest percentage done: 40%
backtest percentage done: 50%
backtest percentage done: 60%
backtest percentage done: 70%
backtest percentage done: 80%
backtest percentage done: 90%
backtest percentage done: 100%

--------------------------------------------------------------------

BACKTEST RESULTS:
ticks data duration: 47 days
starting balance: 1000
ending balance: 2820.9999999999986
number of trades won: 53
number of trades lost: 37
number of buys: 83 (49 won, 34 lost)
number of sells: 7 (4 won, 3 lost)
balance range: [720.0000000000088, 3076.4000000000106]
equity range: [720.0000000000092, 3102.000000000007]
free margin range: [625.5754200000116, 3051.43120000001]
margins range: [24.2822, 242.32760000000002]
margin levels range: [476.79686947559895, 12196.312876210966]
concurrently open trades range: [0, 5]
concurrently losing trades range: [0, 3]
backt

In [None]:
# # for debugging

# fast_ma_perc_chngs = pd.DataFrame(fast_ma_perc_chngs,columns=model_data.columns)
# print(fast_ma_perc_chngs.shape)

# slow_ma_perc_chngs = pd.DataFrame(slow_ma_perc_chngs,columns=model_data.columns)
# print(slow_ma_perc_chngs.shape)  

# x = apply_moving_avg(model_data, ma_cols, fast_ma_window)
# x.dropna(how='any', axis=0, inplace=True)
# x = apply_perc_change(x, pc_cols)
# x.dropna(how='any', axis=0, inplace=True)
# x = normalize_data(x, train_data=False, normalization_terms=fast_ma_norm_terms)[0]
# x_vals = x.to_numpy().astype(np.float32)

# print(x.shape)
# res = np.isclose(x_vals, fast_ma_perc_chngs.to_numpy().astype(np.float32))
# print(res)
# print(np.all(res))

# print()

# x = apply_moving_avg(model_data, ma_cols, slow_ma_window)
# x.dropna(how='any', axis=0, inplace=True)
# x = apply_perc_change(x, pc_cols)
# x.dropna(how='any', axis=0, inplace=True)
# x = normalize_data(x, train_data=False, normalization_terms=slow_ma_norm_terms)[0]
# x_vals = x.to_numpy().astype(np.float32)

# print(x.shape)
# res = np.isclose(x_vals, slow_ma_perc_chngs.to_numpy().astype(np.float32))
# print(res)
# print(np.all(res))

#### tune strat hyperparams with grid search

In [None]:
param_grid = {
    'ma_models_settings': [
        {
            'fast_ma_model_path': '../my_stuff/final_EURUSD-H1_Bi-LSTM_5-ma_8-22-44-ichi.hdf5',
            'slow_ma_model_path': '../my_stuff/final_EURUSD-H1_Bi-LSTM_13-ma_8-22-44-ichi.hdf5',
            'fast_ma_window': 5,
            'slow_ma_window': 13
        },
        {
            'fast_ma_model_path': '../my_stuff/final_EURUSD-H1_Bi-LSTM_3-ma_8-22-44-ichi.hdf5',
            'slow_ma_model_path': '../my_stuff/final_EURUSD-H1_Bi-LSTM_5-ma_8-22-44-ichi.hdf5',
            'fast_ma_window': 3,
            'slow_ma_window': 13
        },
        {
            'fast_ma_model_path': '../my_stuff/final_EURUSD-H1_Bi-LSTM_7-ma_8-22-44-ichi.hdf5',
            'slow_ma_model_path': '../my_stuff/final_EURUSD-H1_Bi-LSTM_13-ma_8-22-44-ichi.hdf5',
            'fast_ma_window': 7,
            'slow_ma_window': 13
        },
    ],
    'xgb_model_settings': [
        {
            'model_filepath': '../my_stuff/EURUSD-H1_0.003-min_profit_0.2-lots_left-cur_side_8-22-24-cb-tk-tkp-sen-chi-ichi_xgb_classifier.json',
            'lots_per_trade': 0.2,
            'profit_noise_percent': 0.003,
            'ichi_settings': (8, 22, 24),
            'currency_side': 'left'
        },
        {
            'model_filepath': '../my_stuff/EURUSD-H1_0.003-min_profit_0.2-lots_left-cur_side_8-22-44-cb-tk-tkp-sen-chi-ichi_xgb_classifier.json',
            'lots_per_trade': 0.2,
            'profit_noise_percent': 0.003,
            'ichi_settings': (8, 22, 44),
            'currency_side': 'left'
        },
        {
            'model_filepath': '../my_stuff/EURUSD-H1_0.004-min_profit_0.2-lots_right-cur_side_8-22-24-cb-tk-tkp-sen-chi-ichi_xgb_classifier.json',
            'lots_per_trade': 0.2,
            'profit_noise_percent': 0.004,
            'ichi_settings': (8, 22, 24),
            'currency_side': 'right'
        },
        {
            'model_filepath': '../my_stuff/EURUSD-H1_0.004-min_profit_0.2-lots_right-cur_side_8-22-44-cb-tk-tkp-sen-chi-ichi_xgb_classifier.json',
            'lots_per_trade': 0.2,
            'profit_noise_percent': 0.004,
            'ichi_settings': (8, 22, 44),
            'currency_side': 'right'
        }
    ],
    'strat_params': [{
        'starting_balance': [1000],
        'leverage': [500],
        'max_concurrent_trades': [np.inf, 10, 5],
        'ma_diff_threshold_pairs': [(0.02, 0.02), (0.06, 0.06), (0.03, 0.03), (0.04, 0.04), (0.05, 0.05)],   # (fast ma's, slow ma's)
        'decision_prob_diff_thresh': [0.5, 0.3, 0.4, 0.2]
    }]
}

lstm_seq_len = 128
xgb_labels_dict = {1: 'buy', 0: 'sell'}
contract_size = 100_000   # size of 1 lot is typically 100,000 (100 for gold, becuase 1 lot = 100 oz of gold)
pip_resolution = 0.0001
stop_out_pct = 0.2  # explaination: https://www.tradersway.com/new_to_the_market/forex_and_cfd_basics#margin
label_non_signals=False
cur_pair = 'EURUSD'
timeframe = 'H1'
hedged_margin = 50_000
tradersway_commodity = False

open_trade_sigs = ['cloud_breakout_bull','cloud_breakout_bear',                       # cloud breakout
                   'tk_cross_bull_strength', 'tk_cross_bear_strength',                # Tenkan Sen / Kijun Sen Cross
                   'tk_price_cross_bull_strength', 'tk_price_cross_bear_strength',    # price crossing both the Tenkan Sen / Kijun Sen
                   'senkou_cross_bull_strength', 'senkou_cross_bear_strength',        # Senkou Span Cross
                   'chikou_cross_bull_strength', 'chikou_cross_bear_strength']        # Chikou Span Cross
ma_cols = ['Open','High','Low','Close','Volume']
pc_cols = ['Open','High','Low','Close','Volume',
           'trend_ichimoku_base','trend_ichimoku_conv',
           'trend_ichimoku_a', 'trend_ichimoku_b']
normalization_groups = [['Open','High','Low','Close'],  # prices
                        ['trend_ichimoku_base','trend_ichimoku_conv'],  # ichi conv & base lines
                        ['trend_ichimoku_a', 'trend_ichimoku_b'], # ichi cloud lines
                        ['tk_cross_bull_strength','tk_cross_bear_strength',   # tk cross strength
                        'tk_price_cross_bull_strength','tk_price_cross_bear_strength',   # tk price cross strength
                        'senkou_cross_bull_strength','senkou_cross_bear_strength',   # semkou cross strength
                        'chikou_cross_bull_strength','chikou_cross_bear_strength']]   # chikou cross strength


param_grid = ParameterGrid(param_grid)
param_grid = random.sample(list(param_grid), len(param_grid))

In [None]:
start_dt_str = '2020-11-02'
end_dt_str = '2021-01-05'
tick_data_filepath = gi.download_mt5_data(cur_pair, timeframe, start_dt_str, end_dt_str) # (cur_pair, timeframe, '2020-10-02', '2021-01-05')
best_strat_results = None
best_strat_score = None
backtest_results = []

grid_search_start_time = time.time()
for params_i, params in enumerate(param_grid):
    s1 = time.time()
    ma_models_settings = params['ma_models_settings']
    xgb_model_settings = params['xgb_model_settings']
    
    strat_params = params['strat_params']
    strat_params = ParameterGrid(strat_params)
    strat_params = random.sample(list(strat_params), len(strat_params))
    
    fast_ma_model_path = ma_models_settings['fast_ma_model_path']
    slow_ma_model_path = ma_models_settings['slow_ma_model_path']
    fast_ma_window = ma_models_settings['fast_ma_window']
    slow_ma_window = ma_models_settings['slow_ma_window']
    
    fast_ma_model = tf.keras.models.load_model(fast_ma_model_path)
    slow_ma_model = tf.keras.models.load_model(slow_ma_model_path)
    
    xgb_model_path = xgb_model_settings['model_filepath']
    lots_per_trade = xgb_model_settings['lots_per_trade']
    profit_noise_percent = xgb_model_settings['profit_noise_percent']
    tenkan_period, kijun_period, senkou_b_period = xgb_model_settings['ichi_settings']
    currency_side = xgb_model_settings['currency_side']
    in_quote_currency = True if currency_side == 'right' else False
    
    xgb_decision_predictor = xgb.Booster()
    xgb_decision_predictor.load_model(xgb_model_path)
    
    model_config = {
        'current_model':'ichi_cloud',
        'ichi_cloud':{
            'indicators': {
                'ichimoku': {
                    'tenkan_period': tenkan_period,
                    'kijun_period': kijun_period,
                    'chikou_period': kijun_period,
                    'senkou_b_period': senkou_b_period
                },
                'rsi': {
                    'periods': 14
                }
            }
        }
    }
    
    data_with_indicators = gi.add_indicators_to_raw(filepath=tick_data_filepath, 
                                                    save_to_disk=False, 
                                                    config=model_config, 
                                                    has_headers=True,
                                                    datetime_col='datetime')
    test_data_with_ichi_sigs = add_features(data_with_indicators)
    model_data = dummy_and_remove_features(test_data_with_ichi_sigs)
    
    start, stop = no_missing_data_idx_range(test_data_with_ichi_sigs, early_ending_cols=['chikou_span_visual'])
    test_data_with_ichi_sigs = test_data_with_ichi_sigs.iloc[start:stop+1]
    test_data_np = test_data_with_ichi_sigs.to_numpy()

    start, stop = no_missing_data_idx_range(model_data, early_ending_cols=['chikou_span_visual'])
    model_data = model_data.iloc[start:stop+1]
    model_data_np = model_data.to_numpy()

    ma_cols_set = set([model_data.columns.get_loc(col_name) for col_name in ma_cols])
    pc_cols_set = set([model_data.columns.get_loc(col_name) for col_name in pc_cols])

    feature_indices = {test_data_with_ichi_sigs.columns[i]: i for i in range(len(test_data_with_ichi_sigs.columns))}
    
    for params_i_2, params_2 in enumerate(strat_params):
        s2 = time.time()
        starting_balance = params_2['starting_balance']
        leverage = params_2['leverage']    # 1:leverage
        max_concurrent_trades = params_2['max_concurrent_trades']
        fast_ma_diff_thresh, slow_ma_diff_thresh = params_2['ma_diff_threshold_pairs']
        decision_prob_diff_thresh = params_2['decision_prob_diff_thresh']   # 0.5 accepts all probabilities
        
        pip_value = contract_size * lots_per_trade * pip_resolution   # in quote currency (right side currency of currency pair)
        profit_noise = profit_noise_percent * lots_per_trade * contract_size   # in base currecy because thats what models were traied on
        
        fast_ma_data = get_split_data_ma(model_data, ma_window=fast_ma_window, seq_len=lstm_seq_len, split_percents=(0,0), 
                                          normalization_groups=normalization_groups, pc_cols=pc_cols, ma_cols=ma_cols, min_batch_size=1000, 
                                          max_batch_size=2000, just_train=True, print_info=False)
        slow_ma_data = get_split_data_ma(model_data, ma_window=slow_ma_window, seq_len=lstm_seq_len, split_percents=(0,0), 
                                          normalization_groups=normalization_groups, pc_cols=pc_cols, ma_cols=ma_cols, min_batch_size=1000, 
                                          max_batch_size=2000, just_train=True, print_info=False)
        
        fast_ma_norm_terms = fast_ma_data['all_train_normalization_terms']
        slow_ma_norm_terms = slow_ma_data['all_train_normalization_terms']
        
        trades = {}
        backtest_trades = {}   # closed trades results
        pending_order = None
        pending_close = None
        decisions_so_far = []
        fast_ma_seq_buf = deque()
        slow_ma_seq_buf = deque()
        fast_ma_window_buf = deque()
        slow_ma_window_buf = deque()
        fast_ma_avgs = []
        slow_ma_avgs = []
        fast_ma_perc_chngs = []
        slow_ma_perc_chngs = []
        fast_ma_preds = []
        slow_ma_preds = []
        cnn_lstm_pred_times = []
        xgb_pred_times = []
        free_margins = []
        margins = []
        margin_levels = []
        equities = []
        balances = []
        open_trades_counts = []
        losing_trades_counts = []
        pct_done = 0
        buffers_rdy_idx = None
        balance = starting_balance
        equity = starting_balance
        free_margin = starting_balance
        losing_trades = 0
        margin_level = None
        margin = None
        final_dt = None
        stop = False

        start_time = time.time()
        for i in range(len(test_data_np)):
            """
            fill data buffers for models
            """

            # for fast MA model

            fast_ma_window_buf.append(model_data_np[i])
            if len(fast_ma_window_buf) > fast_ma_window:
                fast_ma_window_buf.popleft()

            if len(fast_ma_window_buf) == fast_ma_window:
                row = apply_moving_avg_q(fast_ma_window_buf, ma_cols_set)
                fast_ma_avgs.append(row)

            if len(fast_ma_avgs) >= 2:
                row = apply_perc_change_list(fast_ma_avgs[-2], fast_ma_avgs[-1], pc_cols_set)
                row = normalize_data_list(row, fast_ma_norm_terms)
                fast_ma_perc_chngs.append(row) 

            if len(fast_ma_perc_chngs) > 0:
                fast_ma_seq_buf.append(fast_ma_perc_chngs[-1])

            if len(fast_ma_seq_buf) > lstm_seq_len:
                fast_ma_seq_buf.popleft()

            # for slow MA model

            slow_ma_window_buf.append(model_data_np[i])
            if len(slow_ma_window_buf) > slow_ma_window:
                slow_ma_window_buf.popleft()

            if len(slow_ma_window_buf) == slow_ma_window:
                row = apply_moving_avg_q(slow_ma_window_buf, ma_cols_set)
                slow_ma_avgs.append(row)

            if len(slow_ma_avgs) >= 2:
                row = apply_perc_change_list(slow_ma_avgs[-2], slow_ma_avgs[-1], pc_cols_set)
                row = normalize_data_list(row, slow_ma_norm_terms)
                slow_ma_perc_chngs.append(row)  

            if len(slow_ma_perc_chngs) > 0:
                slow_ma_seq_buf.append(slow_ma_perc_chngs[-1])

            if len(slow_ma_seq_buf) > lstm_seq_len:
                slow_ma_seq_buf.popleft()

            # check if LSTMs have enough data to being trade simulation
            if len(fast_ma_seq_buf) == lstm_seq_len and len(slow_ma_seq_buf) == lstm_seq_len:
                """
                simulate trading
                """

                if buffers_rdy_idx is None:
                    buffers_rdy_idx = i
                    print('model buffers full, beginning trade sim...')

                # look for ichiomku signals
                causes = []
                for sig in open_trade_sigs:
                    sig_i = feature_indices[sig]
                    if test_data_np[i][sig_i] != 0:
                        causes.append(sig)

                start = time.time()
                fast_ma_pred = fast_ma_model.predict(np.array([fast_ma_seq_buf]))
                slow_ma_pred = slow_ma_model.predict(np.array([slow_ma_seq_buf]))
                duration = time.time() - start
                cnn_lstm_pred_times.append(duration)

                fast_ma_preds.append(fast_ma_pred[0][0])
                slow_ma_preds.append(slow_ma_pred[0][0])

                if len(fast_ma_preds) > 1:
                    fast_ma_diff = fast_ma_preds[-1] - fast_ma_preds[-2]    # remember this is the diff in the pct_change of the mov avg
                    slow_ma_diff = slow_ma_preds[-1] - slow_ma_preds[-2]
                else:
                    fast_ma_diff = 0
                    slow_ma_diff = 0

                if pending_order is not None:
                    pending_order_i, decision_label, decision_prob, order_causes, sig_fast_ma_diff, sig_slow_ma_diff  = pending_order
                    open_price = test_data_np[i][feature_indices['Open']]
                    decision_prob_diff = abs(decision_label-decision_prob)

                    trades[pending_order_i] = {
                        'decision_label': decision_label,
                        'decision_prob': decision_prob,
                        'causes': order_causes,
                        'open_price': open_price,
                        'trade_open_tick_i': i,
                        'profit': None,
                        'best_profit': None,
                        'ticks_till_close': None,
                        'close_idx': None,
                        'lots': lots_per_trade,
                        'look_to_close': False,
                        'forced_close': False,
                        'fast_ma_diff_at_sig': sig_fast_ma_diff,
                        'slow_ma_diff_at_sig': sig_slow_ma_diff,
                        'fast_ma_diff_at_close': None,
                        'slow_ma_diff_at_close': None,
                        'fast_ma_diff_at_best_sign_to_close': None,
                        'slow_ma_diff_at_best_sign_to_close': None
                    }

                    required_margin = get_margin(trades, buy_label=1, sell_label=0, contract_size=contract_size, leverage=leverage, 
                                                 tradersway_commodity=tradersway_commodity, in_quote_currency=in_quote_currency, hedged_margin=hedged_margin)

                    # reference on opening trades and margin level https://www.luckscout.com/leverage-margin-balance-equity-free-margin-and-margin-level-in-forex-trading/
                    if required_margin > free_margin or (margin_level is not None and margin_level <= 100) \
                            or len(trades) > max_concurrent_trades or decision_prob_diff > decision_prob_diff_thresh:
                        del trades[pending_order_i]
                    else:
                        margin = required_margin

                    pending_order = None

                # update equity and free margin based on currently opened trades
                losing_trades = 0
                for trade_i in trades:
                    trade = trades[trade_i]
                    close_price = test_data_np[i][feature_indices['Close']]
                    trade_decision = xgb_labels_dict[trade['decision_label']]

                    profit = get_profit(close_price, trade['open_price'], pip_value=pip_value, pip_resolution=pip_resolution, in_quote_currency=in_quote_currency)
                    if trade_decision == 'sell':
                        profit *= - 1

                    if trade['profit'] is None:
                        profit_delta = profit
                    else:
                        profit_delta = profit - trade['profit']
                    trade['profit'] = profit

                    if profit < 0:
                        losing_trades += 1

                    if trade['best_profit'] is None or profit > trade['best_profit']:
                        trade['best_profit'] = profit
                        if (fast_ma_diff < 0 and trade_decision == 'buy') or (fast_ma_diff > 0 and trade_decision == 'sell'):
                            trade['fast_ma_diff_at_best_sign_to_close'] = fast_ma_diff
                        if (slow_ma_diff < 0 and trade_decision == 'buy') or (slow_ma_diff > 0 and trade_decision == 'sell'):
                            trade['slow_ma_diff_at_best_sign_to_close'] = slow_ma_diff

                    equity += profit_delta
                    free_margin = equity - margin 
                    margin_level = equity / margin * 100

                    scaled_profit_noise = profit_noise if not in_quote_currency else profit_noise / close_price
                    if abs(profit) >= scaled_profit_noise:
                        trade['look_to_close'] = True

                # check if equity is <= 0, and if so end the sim
                if equity <= 0:
                    stop = True
                    print(f'strat failed (i={i}, dt={test_data_np[i][feature_indices["datetime"]]}): no more equity')

                # check if trades should be closed due to stop-out starting with biggest loss if so
                if margin_level is not None and margin_level <= stop_out_pct:
                    sorted_keys = sorted(trades, key=lambda trade_i: trades[trade_i]['profit'])
                    for j, trade_i in enumerate(sorted_keys):
                        balance += trades[trade_i]['profit']

                        open_tick_i = trades[trade_i]['trade_open_tick_i']
                        trades[trade_i]['ticks_till_close'] = i - open_tick_i
                        trades[trade_i]['close_idx'] = i
                        trades[trade_i]['forced_close'] = True
                        trades[trade_i]['fast_ma_diff_at_close'] = fast_ma_diff
                        trades[trade_i]['slow_ma_diff_at_close'] = slow_ma_diff
                        backtest_trades[trade_i] = trades[trade_i]

                        del trades[trade_i]

                        if j != len(sorted_keys) - 1:
                            margin = get_margin(trades, buy_label=1, sell_label=0, contract_size=contract_size, leverage=leverage, tradersway_commodity=tradersway_commodity, 
                                                in_quote_currency=in_quote_currency, hedged_margin=hedged_margin)
                            free_margin = equity - margin
                            margin_level = equity / margin * 100                    
                            if margin_level > stop_out_pct:
                                break   

                # find trades to close based on CNN-LSTM preds
                closed_trades = []
                for trade_i in trades: 
                    trade = trades[trade_i]
                    trade_decision = xgb_labels_dict[trade['decision_label']]

                    if trade['look_to_close']:
                        if abs(fast_ma_diff) >= fast_ma_diff_thresh:
                            # (MA pct_change is decreasing on a long trade) or (MA pct_change is increasing on a short trade)
                            if (fast_ma_diff < 0 and trade_decision == 'buy') or (fast_ma_diff > 0 and trade_decision == 'sell'):  
                                closed_trades.append(trade_i)

                for trade_i in closed_trades:
                    balance += trades[trade_i]['profit']

                    open_tick_i = trades[trade_i]['trade_open_tick_i']
                    trades[trade_i]['ticks_till_close'] = i - open_tick_i
                    trades[trade_i]['close_idx'] = i
                    trades[trade_i]['fast_ma_diff_at_close'] = fast_ma_diff
                    trades[trade_i]['slow_ma_diff_at_close'] = slow_ma_diff
                    backtest_trades[trade_i] = trades[trade_i]

                    del trades[trade_i]

                if len(trades) == 0:
                    margin = None
                    margin_level = None

                # generate decision w/ XGB classifier and create pending order
                if len(causes) > 0 and not stop:
                    start = time.time()
                    model_input = pd.DataFrame([model_data_np[i]], columns=model_data.columns)
                    model_input = xgb.DMatrix(model_input)
                    decision_prob = xgb_decision_predictor.predict(model_input)[0]
                    duration = time.time() - start # inlucde converting input in pred time
                    xgb_pred_times.append(duration)

                    decision_label = np.around(decision_prob)

#                     if (decision_label == 1 and fast_ma_diff > 0) or (decision_label == 0 and fast_ma_diff < 0):
#                         pending_order = (i, decision_label, decision_prob, causes, fast_ma_diff, slow_ma_diff)
                    pending_order = (i, decision_label, decision_prob, causes, fast_ma_diff, slow_ma_diff)

                cur_pct_done = int((i-buffers_rdy_idx+1) / (len(test_data_np)-buffers_rdy_idx) * 100)
                if cur_pct_done != pct_done and cur_pct_done % 10 == 0:
                    pct_done = cur_pct_done
                    print(f'backtest percentage done: {cur_pct_done}%')

            free_margins.append(free_margin)
            equities.append(equity)
            balances.append(balance)
            margins.append(margin)
            margin_levels.append(margin_level)
            open_trades_counts.append(len(trades))
            losing_trades_counts.append(losing_trades)

            final_dt = test_data_np[i][feature_indices["datetime"]]
            if stop:
                break

        # print backtest results

        backtest_runtime = time.time() - start_time
        start_dt = test_data_np[buffers_rdy_idx][feature_indices['datetime']]
        end_dt = final_dt

        margin_levels_no_none = [ml for ml in margin_levels if ml is not None]
        margins_no_none = [m for m in margins if m is not None]

#         ma_diff_stat_names = ['fast_ma_diff_at_sig', 'slow_ma_diff_at_sig', 'fast_ma_diff_at_close', 'slow_ma_diff_at_close',
#                               'fast_ma_diff_at_best_sign_to_close', 'slow_ma_diff_at_best_sign_to_close']
#         losses_ma_diff_stats = {name: {'list': [], 'agree_list':[], 'oppose_list':[]} for name in ma_diff_stat_names}
#         wins_ma_diff_stats = {name: {'list': [], 'agree_list':[], 'oppose_list':[]} for name in ma_diff_stat_names}
#         for trade_i in backtest_trades:
#             trade = backtest_trades[trade_i]
#             if trade['profit'] > 0:
#                 if (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] < 0):
#                     wins_ma_diff_stats['fast_ma_diff_at_sig']['agree_list'].append(abs(trade['fast_ma_diff_at_sig']))
#                 elif (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] > 0):
#                     wins_ma_diff_stats['fast_ma_diff_at_sig']['oppose_list'].append(abs(trade['fast_ma_diff_at_sig']))
#                 if (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] < 0):
#                     wins_ma_diff_stats['slow_ma_diff_at_sig']['agree_list'].append(abs(trade['slow_ma_diff_at_sig']))
#                 elif (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] > 0):
#                     wins_ma_diff_stats['slow_ma_diff_at_sig']['oppose_list'].append(abs(trade['slow_ma_diff_at_sig']))

#                 wins_ma_diff_stats['fast_ma_diff_at_close']['list'].append(abs(trade['fast_ma_diff_at_close']))
#                 wins_ma_diff_stats['slow_ma_diff_at_close']['list'].append(abs(trade['slow_ma_diff_at_close']))

#                 if trade['fast_ma_diff_at_best_sign_to_close'] is not None:
#                     wins_ma_diff_stats['fast_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['fast_ma_diff_at_best_sign_to_close']))
#                 if trade['slow_ma_diff_at_best_sign_to_close'] is not None:
#                     wins_ma_diff_stats['slow_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['slow_ma_diff_at_best_sign_to_close']))
#             else:
#                 if (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] < 0):
#                     losses_ma_diff_stats['fast_ma_diff_at_sig']['agree_list'].append(abs(trade['fast_ma_diff_at_sig']))
#                 elif (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] > 0):
#                     losses_ma_diff_stats['fast_ma_diff_at_sig']['oppose_list'].append(abs(trade['fast_ma_diff_at_sig']))
#                 if (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] < 0):
#                     losses_ma_diff_stats['slow_ma_diff_at_sig']['agree_list'].append(abs(trade['slow_ma_diff_at_sig']))
#                 elif (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] > 0):
#                     losses_ma_diff_stats['slow_ma_diff_at_sig']['oppose_list'].append(abs(trade['slow_ma_diff_at_sig']))

#                 losses_ma_diff_stats['fast_ma_diff_at_close']['list'].append(abs(trade['fast_ma_diff_at_close']))
#                 losses_ma_diff_stats['slow_ma_diff_at_close']['list'].append(abs(trade['slow_ma_diff_at_close']))

#                 if trade['fast_ma_diff_at_best_sign_to_close'] is not None:
#                     losses_ma_diff_stats['fast_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['fast_ma_diff_at_best_sign_to_close']))
#                 if trade['slow_ma_diff_at_best_sign_to_close'] is not None:
#                     losses_ma_diff_stats['slow_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['slow_ma_diff_at_best_sign_to_close']))
#         losses_ma_diff_stats = {name: {'arr': np.array(losses_ma_diff_stats[name]['list']), 
#                                        'agree_arr': np.array(losses_ma_diff_stats[name]['agree_list']), 
#                                        'oppose_arr': np.array(losses_ma_diff_stats[name]['oppose_list'])} for name in losses_ma_diff_stats}
#         wins_ma_diff_stats = {name: {'arr': np.array(wins_ma_diff_stats[name]['list']), 
#                                      'agree_arr': np.array(wins_ma_diff_stats[name]['agree_list']), 
#                                      'oppose_arr': np.array(wins_ma_diff_stats[name]['oppose_list'])} for name in wins_ma_diff_stats}

        print('\n--------------------------------------------------------------------\n')
        print('BACKTEST RESULTS:')
        print(f'ticks data duration: {(end_dt-start_dt).days} days')
        print(f'starting balance: {starting_balance}')
        print(f'ending balance: {balance}')
        print(f'balance range: [{min(balances)}, {max(balances)}]')
        print(f'equity range: [{min(equities)}, {max(equities)}]')
        print(f'free margin range: [{min(free_margins)}, {max(free_margins)}]')
        print(f'margins range: [{min(margins_no_none)}, {max(margins_no_none)}]')
        print(f'margin levels range: [{min(margin_levels_no_none)}, {max(margin_levels_no_none)}]')
        print(f'concurrently open trades range: [{min(open_trades_counts)}, {max(open_trades_counts)}]')
        print(f'concurrently losing trades range: [{min(losing_trades_counts)}, {max(losing_trades_counts)}]')
        print(f'backtest runtime: {backtest_runtime/60} min')

#         print('\nWON TRADES RESULTS:')
#         for stat in wins_ma_diff_stats:
#             stat_arr = wins_ma_diff_stats[stat]['arr']
#             stat_agree_arr = wins_ma_diff_stats[stat]['agree_arr']
#             stat_oppose_arr = wins_ma_diff_stats[stat]['oppose_arr']
#             if len(stat_arr) > 0:
#                 print(f'{stat}: count={len(stat_arr)}, min={np.amin(stat_arr)}, max={np.amax(stat_arr)},'
#                       f' mean={np.mean(stat_arr)}, median={np.median(stat_arr)}')
#             if len(stat_agree_arr) > 0:
#                 print(f'{stat} that aggreed: count={len(stat_agree_arr)}, min={np.amin(stat_agree_arr)}, max={np.amax(stat_agree_arr)},'
#                       f' mean={np.mean(stat_agree_arr)}, median={np.median(stat_agree_arr)}')
#             if len(stat_oppose_arr) > 0:
#                 print(f'{stat} that opposed: count={len(stat_oppose_arr)}, min={np.amin(stat_oppose_arr)}, max={np.amax(stat_oppose_arr)},'
#                       f' mean={np.mean(stat_oppose_arr)}, median={np.median(stat_oppose_arr)}')

#         print('\nLOST TRADES RESULTS:')
#         for stat in losses_ma_diff_stats:
#             stat_arr = losses_ma_diff_stats[stat]['arr']
#             stat_agree_arr = losses_ma_diff_stats[stat]['agree_arr']
#             stat_oppose_arr = losses_ma_diff_stats[stat]['oppose_arr']
#             if len(stat_arr) > 0:
#                 print(f'{stat}: count={len(stat_arr)}, min={np.amin(stat_arr)}, max={np.amax(stat_arr)},'
#                       f' mean={np.mean(stat_arr)}, median={np.median(stat_arr)}')
#             if len(stat_agree_arr) > 0:
#                 print(f'{stat} that aggreed: count={len(stat_agree_arr)}, min={np.amin(stat_agree_arr)}, max={np.amax(stat_agree_arr)},'
#                       f' mean={np.mean(stat_agree_arr)}, median={np.median(stat_agree_arr)}')
#             if len(stat_oppose_arr) > 0:
#                 print(f'{stat} that opposed: count={len(stat_oppose_arr)}, min={np.amin(stat_oppose_arr)}, max={np.amax(stat_oppose_arr)},'
#                       f' mean={np.mean(stat_oppose_arr)}, median={np.median(stat_oppose_arr)}')

        print('\nMODELS STATS:')
        print(f'average pred time of fast & slow MA CNN+LSTM models: {sum(cnn_lstm_pred_times)/len(cnn_lstm_pred_times)*1000} ms')
        print(f'average pred time of XGB model: {sum(xgb_pred_times)/len(xgb_pred_times)*1000} ms')
        
        results = {
            'tenkan_period': tenkan_period,
            'kijun_period': kijun_period,
            'chikou_period': kijun_period,
            'senkou_b_period': senkou_b_period,
            'fast_ma_model_path': fast_ma_model_path,
            'slow_ma_model_path': slow_ma_model_path,
            'fast_ma_window': fast_ma_window,
            'slow_ma_window': slow_ma_window,
            'xgb_model_path': xgb_model_path,
            'lots_per_trade': lots_per_trade,
            'profit_noise_percent': profit_noise_percent,
            'stop_out_pct': stop_out_pct,
            'starting_balance': starting_balance,
            'leverage': leverage,
            'max_concurrent_trades': max_concurrent_trades,
            'currency_side': currency_side,
            'fast_ma_diff_thresh': fast_ma_diff_thresh,
            'slow_ma_diff_thresh': slow_ma_diff_thresh,
            'decision_prob_diff_thresh': decision_prob_diff_thresh,
            'ending_balance': balance,
            'max_balance': max(balances),
            'min_balance': min(balances),
            'max_equity': max(equities),
            'min_equity': min(equities),
            'max_free_margin': max(free_margins),
            'min_free_margin': min(free_margins),
            'max_margin': max(margins_no_none),
            'min_margin': min(margins_no_none),
            'max_margin_level': max(margin_levels_no_none),
            'min_margin_level': min(margin_levels_no_none),
            'max_concurrently_open_trades': max(open_trades_counts),
            'min_concurrently_open_trades': min(open_trades_counts),
        }
        
        strat_score = balance
        if best_strat_results is None or best_strat_score < strat_score:
            best_strat_results = results
            best_strat_score = strat_score
            
        backtest_results.append(results)
        
        print('\n--------------------------------------------------------------------------------')
        print(f'{params_i_2+1}/{len(strat_params)} strat params tested, runtime of last params: {(time.time()-s2)/60} min')
        print('--------------------------------------------------------------------------------\n')
        print(f'last backtest results:')
        print(f'{results}\n')
        print(f'best backtest results:')
        print(f'{best_strat_results}\n')
        
    print('\n--------------------------------------------------------------------------------')
    print(f'{params_i+1}/{len(param_grid)} model combos tested, runtime of last combo: {(time.time()-s1)/60} min')
    print('--------------------------------------------------------------------------------\n')

    backtest_results_sorted = sorted(backtest_results, key=lambda d: d['ending_balance'], reverse=True)
    backtest_results_sorted_df = pd.DataFrame(backtest_results_sorted)
    backtest_results_sorted_df.to_csv(f'../my_stuff/{cur_pair}-{timeframe}_{start_dt_str}-to-{end_dt_str}_backtest_grid_search_results.csv')

print(f'grid search runtime: {(time.time()-grid_search_start_time)/60} min')

backtest_results_sorted = sorted(backtest_results, key=lambda d: d['ending_balance'], reverse=True)
backtest_results_sorted_df = pd.DataFrame(backtest_results_sorted)
backtest_results_sorted_df.to_csv(f'../my_stuff/{cur_pair}-{timeframe}_{start_dt_str}-to-{end_dt_str}_backtest_grid_search_results.csv')

# notes on things to do

In [None]:
"""
if self.data[i][self.feature_indices['datetime']].strftime('%Y-%m-%dT%H:%M') == '2013-05-28T10:00':
    print('yo')
"""

"""
To-do:

1) tune hyperparams for backtest of xgboost for opening and CNN+Bi-LSTM for closing strat
"""