In [1]:
# external packages
from pathlib import Path
import matplotlib.pyplot as plt 
from importlib import reload
import matplotlib
%matplotlib qt
# %matplotlib inline
import numpy as np
matplotlib.style.use('default')
from datetime import datetime
from datetime import timedelta
import pandas as pd
from collections import namedtuple
from collections import deque
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import ParameterGrid
import random
import time

In [2]:
# local modules and packages
from ForexMachine.Preprocessing import research
from ForexMachine import util
reload(research)
reload(util)

# global variables
global_train_data_range_start, global_train_data_range_end = research.TRAIN_DATA_START_ISO, research.TRAIN_DATA_END_ISO

# trying loading and adding indicators to raw data w/ ForexMachine package

In [420]:
indicators_info = {
    'ichimoku': {
        'tenkan_period': 9,
        'kijun_period': 26,
        'chikou_period': 26,
        'senkou_b_period': 24
    },
    'rsi': {
        'periods': 14
    }
}
data_with_indicators = research.add_indicators_to_raw(filepath='../my_stuff/USDZARi60.csv', has_headers=False, 
                                                      headers=['Date','Time','Open', 'High', 'Low', 'Close', 'Volume'],
                                                      indicators_info=indicators_info)
data_with_indicators.head()

Unnamed: 0,Date,Time,Open,High,Low,Close,Volume,datetime,trend_ichimoku_conv,trend_ichimoku_base,trend_ichimoku_a,trend_ichimoku_b,trend_visual_ichimoku_a,trend_visual_ichimoku_b,chikou_span,chikou_span_visual,momentum_rsi
0,2018.11.09,02:00,14.11521,14.12511,14.10894,14.12415,8384,2018-11-09 02:00:00,,,,14.117025,15.373863,15.372442,,14.30154,
1,2018.11.09,03:00,14.1242,14.12751,14.11264,14.12071,5841,2018-11-09 03:00:00,,,,14.118225,15.373863,15.372442,,14.30169,
2,2018.11.09,04:00,14.12069,14.13135,14.11893,14.12025,2804,2018-11-09 04:00:00,,,,14.120145,15.373863,15.372442,,14.32429,
3,2018.11.09,05:00,14.12025,14.15063,14.11565,14.15018,3769,2018-11-09 05:00:00,,,,14.129785,15.373863,15.372442,,14.41788,
4,2018.11.09,06:00,14.1502,14.18167,14.14717,14.18014,2260,2018-11-09 06:00:00,,,,14.145305,15.373863,15.372442,,14.40633,


# define helper plotting functions

In [3]:
def get_index_range(datetime1, datetime2, datetimes):
    i1 = -1
    i2 = -1
    if datetime1 <= datetime2:
        for i in range(len(datetimes)):
            i1 = i
            if datetimes[i] == datetime1:
                break
            if datetimes[i] > datetime1:
                i1 = i-1 if i-1 >= 0 else 0
                break
        for i in range(i1, len(datetimes)):
            i2 = i
            if datetimes[i] == datetime2:
                break
            if datetimes[i] > datetime2:
                i2 = i-1 if i-1 >= 0 else 0
                break
    return i1, i2

# date format 'yyyy.mm.dd'
def show_data_from_range(df, date1, date2, main_indicator, sub_indicators = [], visualize_crosses=False, crosses=None,
                         visualize_labels=False, labels_df=None, labels=None):
    
    start, stop = get_index_range(pd.Timestamp.fromisoformat(date1), pd.Timestamp.fromisoformat(date2), df['datetime'].to_numpy())
    if start < 0 or stop < 0:
        print(f'invalid dates (start i = {start}, stop i = {stop})')
        return
    
    data_range = df.iloc[start:stop+1]
    chart_count = len(sub_indicators) + 1
    
    top_chart_ratio = 1
    sub_chart_ratio = 0
    if chart_count == 2:
        top_chart_ratio = 3
        sub_chart_ratio = 2 / (chart_count-1)
    if chart_count > 2:
        top_chart_ratio = 1
        sub_chart_ratio = 1 / (chart_count-1)
    height_ratios = [top_chart_ratio]
    height_ratios.extend([sub_chart_ratio]*(chart_count-1))
    fig, axes = plt.subplots(chart_count,1,sharex='col', gridspec_kw={'height_ratios':height_ratios})
    fig.tight_layout(pad=1.8, h_pad=0.0)
    
    top_ax = None
    bottom_ax = None
    if chart_count > 1:
        top_ax = axes[0]
        bottom_ax = axes[len(axes)-1]
    else:
        bottom_ax = top_ax = axes
    top_ax.plot(data_range.Close.to_list(), label='Close',color='brown')
    
    plot_indicator_funcs = {
        'ichimoku': lambda ax, dataf: add_ichimoku_to_plot(ax, dataf, visualize_crosses, crosses),
        'rsi': lambda ax, dataf: add_rsi_to_plot(ax, dataf),
        'extra': lambda ax, extra_df, plot_range: add_extra_data_to_plot(ax, extra_df, plot_range)
    }
    
    plot_indicator_funcs[main_indicator](top_ax, data_range)
    
    for i in range(len(sub_indicators)):
        item = sub_indicators[i]
        if isinstance(item, str):
            plot_indicator_funcs[sub_indicators[i]](axes[i+1], data_range)
        elif isinstance(item, pd.DataFrame):
            plot_indicator_funcs['extra'](axes[i+1], item, (start, stop))
        
    if visualize_labels and labels_df is not None:
        add_labels_to_plot(top_ax, df, labels_df, (start, stop), labels)

    bottom_ax.set_xticks(np.arange(len(data_range)))
    x_labels = [dt.strftime('%Y-%m-%d %H:%M') * ((i+1)%2) for i,dt in enumerate(data_range['datetime'])]
    bottom_ax.set_xticklabels(x_labels,rotation=80, wrap=True)
    
    if chart_count > 1:
        for ax in axes:
            ax.legend()
    else:
        top_ax.legend()
        
    plt.show()

    
"""
Functions for adding indicators to a matplotlib chart
"""

def add_ichimoku_to_plot(ax, df, visualize_crosses = False, crosses=None):
    ax.plot(df.trend_visual_ichimoku_a.to_list(), label='Senkou-Span a',linestyle='--',color='green')
    ax.plot(df.trend_visual_ichimoku_b.to_list(), label='Senkou-Span b',linestyle='--',color='red')
    ax.fill_between(np.arange(len(df)),df.trend_visual_ichimoku_a,
                    df.trend_visual_ichimoku_b,alpha=0.2,color='green',
                    where=(df.trend_visual_ichimoku_a > df.trend_visual_ichimoku_b))
    ax.fill_between(np.arange(len(df)),df.trend_visual_ichimoku_a,
                    df.trend_visual_ichimoku_b,alpha=0.2,color='red',
                    where=(df.trend_visual_ichimoku_a <= df.trend_visual_ichimoku_b))
    ax.plot(df.trend_ichimoku_conv.to_list(), label='Tenkan-Sen (conversion)',color='cyan')
    ax.plot(df.trend_ichimoku_base.to_list(), label='Kijun Sen (base)',color='blue')
    ax.plot(df.chikou_span_visual.to_list(), label='chikou span',linestyle=':',color='orange')
    
    if visualize_crosses:
        colors = {
            'tk_cross': 'hotpink',
            'tk_price_cross': 'brown',
            'senkou_cross': 'blue',
            'chikou_cross': 'orange',
            'kumo_breakout': 'purple'
        }
        
        df_idx = {df.columns[i]: i for i in range(len(df.columns))}
        data = df.to_numpy()
        
        if crosses is None:
            crosses = set(['tk_cross', 'tk_price_cross', 'senkou_cross', 'chikou_cross', 'kumo_breakout'])
        else:
            crosses = set(crosses)
                
        for i in range(len(data)):   
            close = data[i][df_idx['Close']]            
            vert_occupied = False
            filler = ''
            
            # tk cross
            if 'tk_cross' in crosses:
                tk_cross_bull_strength = abs(data[i][df_idx['tk_cross_bull_strength']])
                tk_cross_bear_strength = abs(data[i][df_idx['tk_cross_bear_strength']])
                tk_cross_length_bull = data[i][df_idx['tk_cross_bull_length']]
                tk_cross_length_bear = data[i][df_idx['tk_cross_bear_length']]
            
                if not np.isnan(tk_cross_bull_strength) and tk_cross_bull_strength > 0:
                    ax.axvline(x = i, color = colors['tk_cross'])
                    ax.text(x = i, y = close, color = colors['tk_cross'],
                            s = f'^ TK Cross Bull\nstrength={tk_cross_bull_strength}\nlength={tk_cross_length_bull}')
                    vert_occupied = True

                if not np.isnan(tk_cross_bear_strength) and tk_cross_bear_strength > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['tk_cross'])
                    ax.text(x = i, y = close, color = colors['tk_cross'],
                            s = f'_ TK Cross Bear\nstrength={tk_cross_bear_strength}'
                                f'\nlength={tk_cross_length_bear}{filler}')
                    vert_occupied = True
            
            # tk price cross
            if 'tk_price_cross' in crosses:
                tk_price_cross_bull_strength = abs(data[i][df_idx['tk_price_cross_bull_strength']])
                tk_price_cross_bear_strength = abs(data[i][df_idx['tk_price_cross_bear_strength']])
                tk_price_cross_length_bull = data[i][df_idx['tk_price_cross_bull_length']]
                tk_price_cross_length_bear = data[i][df_idx['tk_price_cross_bear_length']]
                
                if not np.isnan(tk_price_cross_bull_strength) and tk_price_cross_bull_strength > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['tk_price_cross'])
                    ax.text(x = i, y = close, color = colors['tk_price_cross'],
                            s = f'^ TK Price Cross Bull\nstrength={tk_price_cross_bull_strength}'
                                f'\nlength={tk_price_cross_length_bull}{filler}')
                    vert_occupied = True

                if not np.isnan(tk_price_cross_bear_strength) and tk_price_cross_bear_strength > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['tk_price_cross'])
                    ax.text(x = i, y = close, color = colors['tk_price_cross'],
                            s = f'_ TK Price Cross Bear\nstrength={tk_price_cross_bear_strength}'
                                f'\nlength={tk_price_cross_length_bear}{filler}')
                    vert_occupied = True
            
            # senkou cross
            if 'senkou_cross' in crosses:
                senkou_cross_bull_strength = abs(data[i][df_idx['senkou_cross_bull_strength']])
                senkou_cross_bear_strength = abs(data[i][df_idx['senkou_cross_bear_strength']])
                senkou_cross_length_bull = data[i][df_idx['senkou_cross_bull_length']]
                senkou_cross_length_bear = data[i][df_idx['senkou_cross_bear_length']]
                
                if not np.isnan(senkou_cross_bull_strength) and senkou_cross_bull_strength > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['senkou_cross'])
                    ax.text(x = i, y = close, color = colors['senkou_cross'],
                            s = f'^ Senkou Cross Bull\nstrength={senkou_cross_bull_strength}'
                                f'\nlength={senkou_cross_length_bull}{filler}')
                    vert_occupied = True

                if not np.isnan(senkou_cross_bear_strength) and senkou_cross_bear_strength > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['senkou_cross'])
                    ax.text(x = i, y = close, color = colors['senkou_cross'],
                            s = f'_ Senkou Cross Bear\nstrength={senkou_cross_bear_strength}'
                                f'\nlength={senkou_cross_length_bear}{filler}')
                    vert_occupied = True
                
            # chikou cross
            if 'chikou_cross' in crosses:
                chikou_cross_bull_strength = abs(data[i][df_idx['chikou_cross_bull_strength']])
                chikou_cross_bear_strength = abs(data[i][df_idx['chikou_cross_bear_strength']])
                chikou_cross_length_bull = data[i][df_idx['chikou_cross_bull_length']]
                chikou_cross_length_bear = data[i][df_idx['chikou_cross_bear_length']]
                
                if not np.isnan(chikou_cross_bull_strength) and chikou_cross_bull_strength > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['chikou_cross'])
                    ax.text(x = i, y = close, color = colors['chikou_cross'],
                            s = f'^ Chikou Cross Bull\nstrength={chikou_cross_bull_strength}'
                                f'\nlength={chikou_cross_length_bull}{filler}')
                    vert_occupied = True

                if not np.isnan(chikou_cross_bear_strength) and chikou_cross_bear_strength > 0:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['chikou_cross'])
                    ax.text(x = i, y = close, color = colors['chikou_cross'],
                            s = f'_ Chikou Cross Bear\nstrength={chikou_cross_bear_strength}'
                                f'\nlength={chikou_cross_length_bear}{filler}')
                    vert_occupied = True
            
            # kumo breakout
            if 'kumo_breakout' in crosses:
                cloud_breakout_bull = data[i][df_idx['cloud_breakout_bull']]
                cloud_breakout_bear = data[i][df_idx['cloud_breakout_bear']]
                
                if cloud_breakout_bull:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['kumo_breakout'])
                    ax.text(x = i, y = close, color = colors['kumo_breakout'], s = f'^ Kumo Breakout Bullish{filler}')
                    vert_occupied = True

                if cloud_breakout_bear:
                    if vert_occupied:
                        filler += '\n'*3
                    ax.axvline(x = i, color = colors['kumo_breakout'])
                    ax.text(x = i, y = close, color = colors['kumo_breakout'], s = f'_ Kumo Breakout Bearish{filler}')
                    vert_occupied = True
        

def add_rsi_to_plot(ax, df):
    ax.plot(df.momentum_rsi.to_list(), label='RSI', color='purple')
    ax.plot([30]*len(df),color='gray',alpha=0.5)
    ax.plot([70]*len(df),color='gray',alpha=0.5)
    ax.fill_between(np.arange(len(df)),[30]*len(df),[70]*len(df),color='gray',alpha=0.2)
    ax.set_ylim(15,85)
    ax.set_yticks(np.arange(20,100,20))

def add_labels_to_plot(ax, all_feat_df, labels_df, plot_range, labels=None):
    if labels is None:
        labels = set(['first_decision','ticks_till_best_profit_first_decision', 'best_profit_first_decision', 'profit_peak_first_decision',
                      'second_decision', 'ticks_till_best_profit_second_decision', 'best_profit_second_decision', 'profit_peak_second_decision',
                      'decision_pred','ticks_till_best_profit_decision_pred', 'best_profit_decision_pred', 'profit_peak_decision_pred'])
    else:
        labels = set(labels)
    
    colors = {
        'buy': 'green',
        'sell': 'red',
    }
    
    start, stop = plot_range
    plot_data_len = stop-start+1
    
    feat_data = all_feat_df.to_numpy()
    labels_data = labels_df.to_numpy()
    feat_df_idx = {all_feat_df.columns[i]: i for i in range(len(all_feat_df.columns))}
    labels_df_idx = {labels_df.columns[i]: i for i in range(len(labels_df.columns))}
    
    verts_occupied = {}
    for i in range(plot_data_len):
        labels_i = i + start
        close = feat_data[labels_i][feat_df_idx['Close']]            
        
        # the 1st and 2nd decisions should never occupy the same vert
        printed_causes = False
        for label_name in ['first_decision', 'second_decision', 'decision_pred']: 
            decision = None if label_name not in labels else labels_data[labels_i][labels_df_idx[label_name]]
            if not pd.isnull(decision):
                decision_type = 'true'
                if label_name == 'decision_pred':
                    decision_type = 'prediction'
                
                lines = 2
                filler = '\n'
                if i in verts_occupied:
                    filler = '\n' * (verts_occupied[i] + 1)
                
                color = colors[decision]
                txt = [f'{filler}---------------------------------------',
                       f'{decision_type} {label_name}: {decision}']

                if f'best_profit_{label_name}' in labels:
                    profit = labels_data[labels_i][labels_df_idx[f'best_profit_{label_name}']] 
                    txt.append(f'best profit: {profit}')
                    lines+=1

                    if f'profit_peak_{label_name}' in labels:
                        peak_idx = int(labels_data[labels_i][labels_df_idx[f'profit_peak_{label_name}']])
                        plot_idx = peak_idx - start
                        txt.append(f'best profit datetime: {feat_data[peak_idx][feat_df_idx["datetime"]].strftime("%Y-%m-%d %H:%M")}')
                        lines+=1

                        if plot_idx < plot_data_len:
                            peak_close = feat_data[peak_idx][feat_df_idx['Close']]   
                            ax.plot(plot_idx, peak_close, marker='o', markersize=12, color='black')
                            filler_2 = '\n'
                            if plot_idx in verts_occupied:
                                filler_2 = ' \n' * (verts_occupied[plot_idx] + 1)
                                verts_occupied[plot_idx] += 2
                            else:
                                verts_occupied[plot_idx] = 2
                            ax.text(x=plot_idx, y=peak_close, color=color, verticalalignment='top',
                                    s=f'{filler_2}closed {decision_type} {decision} from '
                                      f'{feat_data[labels_i][feat_df_idx["datetime"]].strftime("%Y-%m-%d %H:%M")}\nprofit: {profit}')

                if f'ticks_till_best_profit_{label_name}' in labels:
                    ticks = int(labels_data[labels_i][labels_df_idx[f'ticks_till_best_profit_{label_name}']]) 
                    txt.append(f'ticks till best: {ticks}')
                    lines+=1

                if 'causes' in labels and not printed_causes:
                    causes = labels_data[labels_i][labels_df_idx['causes']] 
                    txt.append(f'causes: {causes}')
                    printed_causes = True
                    lines+=1
                
                txt = '\n'.join(txt)
                ax.plot(i, close, marker='o', markersize=12, color='black')
                ax.text(x=i, y=close, color=color, verticalalignment='top',
                        s=txt)
            
                if i in verts_occupied:
                    verts_occupied[i] += lines
                else:
                    verts_occupied[i] = lines

def add_extra_data_to_plot(ax, extra_df, plot_range):
    start, stop = plot_range
    extra_df = extra_df.iloc[start:stop+1]
    for col in extra_df:
        ax.plot(extra_df[col].to_numpy(), label=col)

# trying loading data from mt5 terminal w/ ForexMachine package

In [None]:
tick_data_filepath = research.download_mt5_data("EURUSD", 'H1', '2012-01-02', '2020-06-06')

In [None]:
indicators_info = {
    'ichimoku': {
        'tenkan_period': 9,
        'kijun_period': 26,
        'chikou_period': 26,
        'senkou_b_period': 24
    },
    'rsi': {
        'periods': 14
    }
}
data_with_indicators_2 = research.add_indicators_to_raw(filepath=tick_data_filepath, 
                                                        indicators_info=indicators_info, 
                                                        datetime_col='datetime')
data_with_ichi_2 = research.add_ichimoku_features(data_with_indicators_2)
data_with_ichi_2.tail(10)

In [None]:
crosses = ['tk_cross', 'tk_price_cross', 'senkou_cross', 'chikou_cross', 'kumo_breakout']
crosses = ['kumo_breakout']
show_data_from_range(data_with_ichi_2, '2019-01-01', '2019-02-04', main_indicator='ichimoku', sub_indicators=['rsi'], visualize_crosses=True, crosses=crosses)

In [None]:
filepath = research.save_data_with_indicators(data_with_ichi_2,filename=f'ichimoku_sigs-{tick_data_filepath.stem}')
str(filepath)

# test preprocessing funcs from ForexMachine package

In [4]:
# trades = {
#     1: {
#         'decision_label': 1,
#         'lots': 0.11,
#         'open_price': 1.22176,
#     },
#     2: {
#         'decision_label': 1,
#         'lots': 0.76,
#         'open_price': 1.22175,
#     },
#     3: {
#         'decision_label': 1,
#         'lots': 0.14,
#         'open_price': 1.22175,
#     },
#     4: {
#         'decision_label': 0,
#         'lots': 1.28,
#         'open_price': 1.22169,
#     },
#     5: {
#         'decision_label': 0,
#         'lots': 0.55,
#         'open_price': 1.22167,
#     },
# }

# get_margin(trades, buy_label=1, sell_label=0, contract_size=100000, leverage=1000, tradersway_commodity=False, in_quote_currency=True, hedged_margin=50000)

trades = {
    1: {
        'decision_label': 1,
        'lots': 1.14,
        'open_price': 1.27019,
    },
    2: {
        'decision_label': 0,
        'lots': 0.14,
        'open_price': 1.27008,
    },
    3: {
        'decision_label': 0,
        'lots': 0.51,
        'open_price': 1.27011,
    },
}

research.get_margin(trades, buy_label=1, sell_label=0, contract_size=100000, leverage=1000, tradersway_commodity=False, in_quote_currency=False, hedged_margin=50000,
                    trade_indices=None)

81.49999999999999

# XGBoost model

In [13]:
import xgboost as xgb

#### try out different models w/ diff hyperparams

In [None]:
"""
XGBoost param tuning guide:
https://towardsdatascience.com/fine-tuning-xgboost-in-python-like-a-boss-b4543ed8b1e
"""

contract_size = 100_000   # size of 1 lot is typically 100,000 (100 for gold, becuase 1 lot = 100 oz of gold)
in_quote_currency = True
pip_resolution = 0.0001
labels_dict = {1: 'buy', 0: 'sell'}

profit_percentages = [(pp/1000,pp/1000) for pp in range(1,101,3)]

param_grid = {
    'ichi_settings': [(9,26,52),(8,22,24),(9,30,60)],
    'labeling_params': [{
        'label_non_signals': [False],
        'profit_percentages': profit_percentages,
        'lots_per_trade': [0.2],
    }],
    'xgboost_params': [{
        'n_estimators': [3000],
        'max_depth': [2],
        'learning_rate': [0.1],
        'subsample': [1],
        'colsample_bytree': [1],
        'gamma': [1]
    }]
}

param_grid = ParameterGrid(param_grid)
param_grid = random.sample(list(param_grid), len(param_grid))

In [None]:
filepath = research.download_mt5_data("EURUSD", 'H1', '2012-01-02', '2020-12-18')
train_split = 0.7
results = []
best_params_first_decision = None
best_score_first_decision = None
best_params_second_decision = None
best_score_second_decision = None
num_class = 3 # buy, sell, wait
signals_to_consider = ['cloud_breakout_bull','cloud_breakout_bear',                       # cloud breakout
                       'tk_cross_bull_strength', 'tk_cross_bear_strength',                # Tenkan Sen / Kijun Sen Cross
                       'tk_price_cross_bull_strength', 'tk_price_cross_bear_strength',    # price crossing both the Tenkan Sen / Kijun Sen
                       'senkou_cross_bull_strength', 'senkou_cross_bear_strength',        # Senkou Span Cross
                       'chikou_cross_bull_strength', 'chikou_cross_bear_strength']        # Chikou Span Cross
pc_cols = ['Open','High','Low','Close','Volume',
           'trend_ichimoku_base','trend_ichimoku_conv',
           'trend_ichimoku_a', 'trend_ichimoku_b']

start_time = time.time()
for i, params in enumerate(param_grid):
    ichi_settings = params['ichi_settings']
    labeling_params = params['labeling_params']
    xgboost_params = params['xgboost_params']
    
    labeling_params = ParameterGrid(labeling_params)
    labeling_params = random.sample(list(labeling_params), len(labeling_params))
    xgboost_params = ParameterGrid(xgboost_params)
    xgboost_params = random.sample(list(xgboost_params), len(xgboost_params))
    
    indicators_info = {
        'ichimoku': {
            'tenkan_period': ichi_settings[0],
            'kijun_period': ichi_settings[1],
            'chikou_period': ichi_settings[1],
            'senkou_b_period': ichi_settings[2]
        },
        'rsi': {
            'periods': 14
        }
    }
    
    # load in and split data
    
    data_with_ta_indicators = research.add_indicators_to_raw(filepath=filepath, 
                                                             indicators_info=indicators_info, 
                                                             datetime_col='datetime')
    data_with_ichi_signals = research.add_ichimoku_features(data_with_ta_indicators)
    start_idx, end_idx = no_missing_data_idx_range(data_with_ichi_signals, early_ending_cols=['chikou_span_visual'])
    data_with_ichi_signals = data_with_ichi_signals[start_idx:].reset_index(drop=True)
    
    if train_split > 1:
        print(f'train_split ({train_split}) is greater than 1, stopping.')
    
    train_p = train_split
    num_rows = len(data_with_ichi_signals)
    train_data_count = int(train_p * num_rows)
    
    train_data_orig = data_with_ichi_signals.iloc[:train_data_count]
    validation_data_orig = data_with_ichi_signals.iloc[train_data_count:]
    
    for j, label_params in enumerate(labeling_params):
        label_non_signals = label_params['label_non_signals']
        min_profit_percent, profit_noise_percent = label_params['profit_percentages']
        lots_per_trade = label_params['lots_per_trade']
    
        # generate labels for data

        train_data_labels = generate_ichimoku_labels(train_data_orig, label_non_signals=label_non_signals, min_profit_percent=min_profit_percent, 
                                                     profit_noise_percent=profit_noise_percent, signals_to_consider=signals_to_consider, 
                                                     contract_size=contract_size, lots_per_trade=lots_per_trade,
                                                     in_quote_currency=in_quote_currency,pip_resolution=pip_resolution, print_debug=False)
        validation_data_labels = generate_ichimoku_labels(validation_data_orig, label_non_signals=label_non_signals, min_profit_percent=min_profit_percent, 
                                                          profit_noise_percent=profit_noise_percent, signals_to_consider=signals_to_consider, 
                                                          contract_size=contract_size, lots_per_trade=lots_per_trade,
                                                          in_quote_currency=in_quote_currency, pip_resolution=pip_resolution, print_debug=False)
        
        train_data = apply_perc_change(train_data_orig, cols=pc_cols, limit=1)
        start_idx, end_idx = no_missing_data_idx_range(train_data, early_ending_cols=['chikou_span_visual'])
        train_data = train_data.iloc[start_idx:end_idx+1]
        train_data_labels = train_data_labels.iloc[start_idx:end_idx+1]
        
        validation_data = apply_perc_change(validation_data_orig, cols=pc_cols, limit=1)
        start_idx, end_idx = no_missing_data_idx_range(validation_data, early_ending_cols=['chikou_span_visual'])
        validation_data = validation_data.iloc[start_idx:end_idx+1]
        validation_data_labels = validation_data_labels.iloc[start_idx:end_idx+1]

        x_train_first_decisions, y_train_first_decisions = missing_labels_preprocess(train_data, train_data_labels, 'first_decision')
        x_valid_first_decisions, y_valid_first_decisions = missing_labels_preprocess(validation_data, validation_data_labels, 'first_decision')
        x_train_first_decisions_profits, y_train_first_decisions_profits = missing_labels_preprocess(train_data, train_data_labels, 
                                                                                                     'best_profit_first_decision')
        x_valid_first_decisions_profits, y_valid_first_decisions_profits = missing_labels_preprocess(validation_data, validation_data_labels, 
                                                                                                     'best_profit_first_decision')

        x_train_second_decisions, y_train_second_decisions = missing_labels_preprocess(train_data, train_data_labels, 'second_decision')
        x_valid_second_decisions, y_valid_second_decisions = missing_labels_preprocess(validation_data, validation_data_labels, 'second_decision')
        x_train_second_decisions_profits, y_train_second_decisions_profits = missing_labels_preprocess(train_data, train_data_labels, 
                                                                                                       'best_profit_second_decision')
        x_valid_second_decisions_profits, y_valid_second_decisions_profits = missing_labels_preprocess(validation_data, validation_data_labels, 
                                                                                                       'best_profit_second_decision')

        # generate predictions w/ XGBoost model
        for k, xgb_params in enumerate(xgboost_params):
            n_estimators = xgb_params['n_estimators']
            max_depth = xgb_params['max_depth']
            learning_rate = xgb_params['learning_rate']
            subsample = xgb_params['subsample']
            colsample_bytree = xgb_params['colsample_bytree']
            gamma = xgb_params['gamma']
            
            if min_profit_percent==profit_noise_percent:
                # binrary classification problem (buy or sell)
                error_metric_name = 'error'
                xgb_params = {'max_depth':max_depth, 'learning_rate':learning_rate, 'objective':'binary:logistic', 'eval_metric': error_metric_name, 
                              'gamma':gamma, 'colsample_bytree':colsample_bytree, 'subsample':subsample}
            else:
                # multi-class classification problem (buy, sell, or wiat)
                error_metric_name = 'merror'
                xgb_params = {'max_depth':max_depth, 'learning_rate':learning_rate, 'objective':'multi:softmax', 'num_class': num_class,
                              'eval_metric': error_metric_name, 'gamma':gamma, 'colsample_bytree':colsample_bytree, 'subsample':subsample}
            
            ### first decisions

            y_train_true, labels_dict = convert_class_labels(y_train_first_decisions, labels_dict=labels_dict)
            y_valid_true, labels_dict = convert_class_labels(y_valid_first_decisions, labels_dict=labels_dict)

            dtrain = xgb.DMatrix(x_train_first_decisions, label=y_train_true)
            dvalidation = [(xgb.DMatrix(x_train_first_decisions, label=y_train_true),'train'), 
                           (xgb.DMatrix(x_valid_first_decisions, label=y_valid_true),'validation')]
            dtest = xgb.DMatrix(x_valid_first_decisions)
            
            evals_result = {}
            decision_predictor = xgb.train(xgb_params, dtrain, num_boost_round=n_estimators, evals=dvalidation, 
                                           evals_result=evals_result, verbose_eval=False)
            
            train_error = evals_result['train'][error_metric_name][-1]
            train_accuracy_first_decision = 1 - train_error
            
            validation_error = evals_result['validation'][error_metric_name][-1]
            validation_accuracy_first_decision = 1 - validation_error
            
            y_test_probs = decision_predictor.predict(dtest)
            y_test_preds = np.around(y_test_probs)
            y_test_preds = pd.DataFrame(y_test_preds, columns=y_valid_true.columns)
            y_test_preds = convert_class_labels(y_test_preds, to_ints=False, labels_dict=labels_dict)[0]
            p_profits_first_decision = potention_profits(y_valid_first_decisions, y_test_preds, y_valid_first_decisions_profits)

            ### second decisions

            y_train_true, labels_dict = convert_class_labels(y_train_second_decisions, labels_dict=labels_dict)
            y_valid_true, labels_dict = convert_class_labels(y_valid_second_decisions, labels_dict=labels_dict)

            dtrain = xgb.DMatrix(x_train_second_decisions, label=y_train_true)
            dvalidation = [(xgb.DMatrix(x_train_second_decisions, label=y_train_true),'train'), 
                           (xgb.DMatrix(x_valid_second_decisions, label=y_valid_true),'validation')]
            dtest = xgb.DMatrix(x_valid_second_decisions)
            
            evals_result = {}
            decision_predictor = xgb.train(xgb_params, dtrain, num_boost_round=n_estimators, evals=dvalidation, 
                                           evals_result=evals_result, verbose_eval=False)

            train_error = evals_result['train'][error_metric_name][-1]
            train_accuracy_second_decision = 1 - train_error
            
            validation_error = evals_result['validation'][error_metric_name][-1]
            validation_accuracy_second_decision = 1 - validation_error
            
            y_test_probs = decision_predictor.predict(dtest)
            y_test_preds = np.around(y_test_probs)
            y_test_preds = pd.DataFrame(y_test_preds, columns=y_valid_true.columns)
            y_test_preds = convert_class_labels(y_test_preds, to_ints=False, labels_dict=labels_dict)[0]
            p_profits_second_decision = potention_profits(y_valid_second_decisions, y_test_preds, y_valid_second_decisions_profits)
        
            all_params = {
                'tenkan_period': ichi_settings[0],
                'kijun_period': ichi_settings[1],
                'chikou_period': ichi_settings[1],
                'senkou_b_period': ichi_settings[2],
                'label_non_signals': label_non_signals,
                'min_profit_percent': min_profit_percent,
                'profit_noise_percent': profit_noise_percent,
                'lots_per_trade': lots_per_trade,
                'n_estimators': n_estimators,
                'max_depth': max_depth,
                'learning_rate': learning_rate,
                'subsample': subsample,
                'colsample_bytree': colsample_bytree,
                'gamma': gamma,
                'train_accuracy_first_decision': train_accuracy_first_decision,
                'validation_accuracy_first_decision': validation_accuracy_first_decision,
                'train_accuracy_second_decision': train_accuracy_second_decision,
                'validation_accuracy_second_decision': validation_accuracy_second_decision,
                'potention_profits_first_decision': p_profits_first_decision,
                'potention_profits_second_decision': p_profits_second_decision
            }
            
            first_decision_score = validation_accuracy_first_decision
            second_decision_score = validation_accuracy_second_decision
            
            if not best_score_first_decision or first_decision_score > best_score_first_decision:
                best_score_first_decision = first_decision_score
                best_params_first_decision = all_params
                
            if not best_score_second_decision or second_decision_score > best_score_second_decision:
                best_score_second_decision = second_decision_score
                best_params_second_decision = all_params
            
            results.append(all_params)
            
            print('--------------------------------------------------------------------')
            print(f'{k+1}/{len(xgboost_params)} xgb params evaulated')
            print('--------------------------------------------------------------------\n')
            print(f'last params evaluated:')
            print(f'{all_params}\n')
            print(f'best first decision params evaluated:')
            print(f'{best_params_first_decision}\n')
            print(f'best second decision params evaluated:')
            print(f'{best_params_second_decision}\n')

        print('--------------------------------------------------------------------')
        print(f'{j+1}/{len(labeling_params)} labeling params evaulated')
        print('--------------------------------------------------------------------\n')
        
        results_sorted = sorted(results, key=lambda d: d['validation_accuracy_first_decision'], reverse=True)
        results_sorted_df = pd.DataFrame(results_sorted)
        results_sorted_df.to_csv('../my_stuff/grid_search_results.csv')
        
    print('--------------------------------------------------------------------')
    print(f'{i+1}/{len(param_grid)} ichimoku settings evaulated')
    print('--------------------------------------------------------------------\n')
    
results_sorted = sorted(results, key=lambda d: d['validation_accuracy_first_decision'], reverse=True)
results_sorted_df = pd.DataFrame(results_sorted)
results_sorted_df.to_csv('../my_stuff/grid_search_results.csv')
print(f'runtime: {(time.time()-start_time)/60} min')

#### train model for backtesting

In [33]:
# hyperparameters

label_non_signals = False
min_profit_percent, profit_noise_percent = 0.01, 0.01
contract_size = 100_000   # size of 1 lot is typically 100,000 (100 for gold, becuase 1 lot = 100 oz of gold)
lots_per_trade = 0.2  
currency_side = 'right'
in_quote_currency = True if currency_side == 'right' else False
pip_resolution = 0.0001

labels_dict = {1: 'buy', 0: 'sell'}
n_estimators = 3000
max_depth = 2
learning_rate = 0.1
subsample = 1
colsample_bytree = 1
gamma = 1
tenkan_period = 9
kijun_period = 30
senkou_b_period = 60
indicators_info = {
    'ichimoku': {
        'tenkan_period': tenkan_period,
        'kijun_period': kijun_period,
        'chikou_period': kijun_period,
        'senkou_b_period': senkou_b_period
    },
    'rsi': {
        'periods': 14
    }
}

signals_to_consider = ['cloud_breakout_bull','cloud_breakout_bear',                       # cloud breakout
                       'tk_cross_bull_strength', 'tk_cross_bear_strength',                # Tenkan Sen / Kijun Sen Cross
                       'tk_price_cross_bull_strength', 'tk_price_cross_bear_strength',    # price crossing both the Tenkan Sen / Kijun Sen
                       'senkou_cross_bull_strength', 'senkou_cross_bear_strength',        # Senkou Span Cross
                       'chikou_cross_bull_strength', 'chikou_cross_bear_strength']        # Chikou Span Cross
sigs_for_filename = 'cb-tk-tkp-sen-chi'

# get data

cur_pair = 'EURUSD'
timeframe = 'H1'
tick_data_filepath = research.download_mt5_data(cur_pair, timeframe, global_train_data_range_start, global_train_data_range_end)
data_with_indicators = research.add_indicators_to_raw(filepath=tick_data_filepath, 
                                                      indicators_info=indicators_info, 
                                                      datetime_col='datetime')
train_data = research.add_ichimoku_features(data_with_indicators)

train_data_labels = generate_ichimoku_labels(train_data, label_non_signals=label_non_signals, min_profit_percent=min_profit_percent, 
                                             profit_noise_percent=profit_noise_percent, signals_to_consider=signals_to_consider, 
                                             contract_size=contract_size, lots_per_trade=lots_per_trade,
                                             in_quote_currency=in_quote_currency,pip_resolution=pip_resolution)

pc_cols = ['Open','High','Low','Close','Volume',
           'trend_ichimoku_base','trend_ichimoku_conv',
           'trend_ichimoku_a', 'trend_ichimoku_b']
train_data = apply_perc_change(train_data, cols=pc_cols, limit=1)
start_idx, end_idx = no_missing_data_idx_range(train_data)
train_data = train_data.iloc[start_idx:end_idx+1]
train_data_labels = train_data_labels.iloc[start_idx:end_idx+1]

x_train_first_decisions, y_train_first_decisions = missing_labels_preprocess(train_data, train_data_labels, 'first_decision')

loaded 60431 rows of tick data from C:\GitHub Repos\ForexMachine\Data\.cache\mt5_EURUSD_h1_ticks_2011-01-01T00;00UTC_to_2020-10-01T00;00UTC.csv
saved 60431 rows of EURUSD h1 tick data to C:\GitHub Repos\ForexMachine\Data\RawData\mt5_EURUSD_h1_ticks_2011-01-01T00;00UTC_to_2020-10-01T00;00UTC.csv, done.


In [34]:
# train model
y_train_true, labels_dict = convert_class_labels(y_train_first_decisions, labels_dict=labels_dict)

dtrain = xgb.DMatrix(x_train_first_decisions, label=y_train_true)
dvalidation = [(xgb.DMatrix(x_train_first_decisions, label=y_train_true),'train')]

if min_profit_percent==profit_noise_percent:
    # binrary classification problem (buy or sell)
    error_metric_name = 'error'
    xgb_params = {'max_depth':max_depth, 'learning_rate':learning_rate, 'objective':'binary:logistic', 'eval_metric': error_metric_name, 'gamma':gamma,
                  'colsample_bytree':colsample_bytree, 'subsample':subsample}
else:
    # multi-class classification problem (buy, sell, or wiat)
    error_metric_name = 'merror'
    xgb_params = {'max_depth':max_depth, 'learning_rate':learning_rate, 'objective':'multi:softmax', 'num_class': num_class,
                  'eval_metric': error_metric_name, 'gamma':gamma, 'colsample_bytree':colsample_bytree, 'subsample':subsample}
evals_result = {}
xgb_first_decision_predictor = xgb.train(xgb_params, dtrain, num_boost_round=n_estimators, evals=dvalidation, evals_result=evals_result)

# print train error
train_error = evals_result['train']['error'][-1]
print(f'\ntrain error: {train_error}')
print(f'train accuracy: {1 - train_error}')

# save model
xgb_first_decision_predictor.save_model(f'../my_stuff/{cur_pair}-{timeframe}_{min_profit_percent}-min_profit_{lots_per_trade}-lots_{currency_side}-cur_side'
                                        f'_{tenkan_period}-{kijun_period}-{senkou_b_period}-{sigs_for_filename}-ichi_xgb_classifier.json')

[0]	train-error:0.45984
[1]	train-error:0.45647
[2]	train-error:0.45815
[3]	train-error:0.45771
[4]	train-error:0.45984
[5]	train-error:0.45771
[6]	train-error:0.45771
[7]	train-error:0.45753
[8]	train-error:0.45771
[9]	train-error:0.45584
[10]	train-error:0.45771
[11]	train-error:0.45815
[12]	train-error:0.45735
[13]	train-error:0.45718
[14]	train-error:0.45744
[15]	train-error:0.45744
[16]	train-error:0.45744
[17]	train-error:0.45620
[18]	train-error:0.45593
[19]	train-error:0.45522
[20]	train-error:0.45238
[21]	train-error:0.45274
[22]	train-error:0.45354
[23]	train-error:0.45327
[24]	train-error:0.45336
[25]	train-error:0.45425
[26]	train-error:0.45363
[27]	train-error:0.45300
[28]	train-error:0.45150
[29]	train-error:0.45123
[30]	train-error:0.44732
[31]	train-error:0.44697
[32]	train-error:0.44510
[33]	train-error:0.44555
[34]	train-error:0.44484
[35]	train-error:0.44413
[36]	train-error:0.44457
[37]	train-error:0.44448
[38]	train-error:0.44546
[39]	train-error:0.44413
[40]	train

In [35]:
# test model on test data
tick_data_filepath = research.download_mt5_data("EURUSD", 'H1', '2020-10-02', '2020-12-18')
data_with_indicators = research.add_indicators_to_raw(filepath=tick_data_filepath, 
                                                      indicators_info=indicators_info, 
                                                      datetime_col='datetime')
test_data = research.add_ichimoku_features(data_with_indicators)

test_data_labels = generate_ichimoku_labels(test_data, label_non_signals=label_non_signals, min_profit_percent=min_profit_percent, 
                                             profit_noise_percent=profit_noise_percent, signals_to_consider=signals_to_consider, 
                                             contract_size=contract_size, lots_per_trade=lots_per_trade,
                                             in_quote_currency=in_quote_currency,pip_resolution=pip_resolution)

test_data = apply_perc_change(test_data, cols=pc_cols, limit=1)
start_idx, end_idx = no_missing_data_idx_range(test_data, early_ending_cols=['chikou_span_visual'])
test_data = test_data.iloc[start_idx:end_idx+1]
test_data_labels = test_data_labels.iloc[start_idx:end_idx+1]

x_test_first_decisions, y_test_first_decisions = missing_labels_preprocess(test_data, test_data_labels, 'first_decision')
x_test_first_decisions_profits, y_test_first_decisions_profits = missing_labels_preprocess(test_data, test_data_labels, 'best_profit_first_decision')

y_test_true, labels_dict = convert_class_labels(y_test_first_decisions, to_numpy=True, labels_dict=labels_dict)

dtest = xgb.DMatrix(x_test_first_decisions)
y_test_probs = xgb_first_decision_predictor.predict(dtest)

y_test_preds = np.around(y_test_probs)
y_test_preds = pd.DataFrame(y_test_preds, columns=y_test_first_decisions.columns)
y_test_preds = convert_class_labels(y_test_preds, to_ints=False, labels_dict=labels_dict)[0]

# print results
test_error, test_wrong_indices = error_rate(y_test_first_decisions, y_test_preds)
p_profits_first_decision = potention_profits(y_test_first_decisions, y_test_preds, y_test_first_decisions_profits)

print(f'\ntest error: {test_error}')
print(f'test accuracy: {1 - test_error}')
print(f'potential profits from test data: {p_profits_first_decision}')
print(f'buy/sell counts:\n{y_test_preds["first_decision"].value_counts()}')

x = x_test_first_decisions.to_numpy()
ytp = y_test_preds["first_decision"].to_numpy()

sell_inputs = []
for i in range(len(ytp)):
    if ytp[i] == 'sell':
        sell_inputs.append(x[i])

loaded 1320 rows of tick data from C:\GitHub Repos\ForexMachine\Data\.cache\mt5_EURUSD_h1_ticks_2020-10-02T00;00UTC_to_2020-12-18T00;00UTC.csv
saved 1320 rows of EURUSD h1 tick data to C:\GitHub Repos\ForexMachine\Data\RawData\mt5_EURUSD_h1_ticks_2020-10-02T00;00UTC_to_2020-12-18T00;00UTC.csv, done.

test error: 0.32158590308370044
test accuracy: 0.6784140969162995
potential profits from test data: 81816.39999999983
buy/sell counts:
buy     193
sell     34
Name: first_decision, dtype: int64


#### analyze binary probs

In [36]:
print(labels_dict,'\n')
test_wrong_indices = set(test_wrong_indices)
y_test_preds_np = np.around(y_test_probs)
wrong_probs_diffs = []
correct_probs_diffs = []
for i in range(len(y_test_probs)):
    if i in test_wrong_indices:
        wrong_probs_diffs.append(abs(y_test_preds_np[i]-y_test_probs[i]))
        print(f'WRONG: true label={y_test_true[i]}, prob={y_test_probs[i]},{wrong_probs_diffs[-1]}')
    else:
        correct_probs_diffs.append(abs(y_test_true[i]-y_test_probs[i]))
        print(f'CORRECT: true label={y_test_true[i]}, prob={y_test_probs[i]},{correct_probs_diffs[-1]}')

fig, ax = plt.subplots()
ax.hist(wrong_probs_diffs, color='red', density=True)
ax.set_title("histogram of differences between wrong labels and XGB model probs")
plt.show()

fig, ax = plt.subplots()
ax.hist(correct_probs_diffs, color='green', density=True)
ax.set_title("histogram of differences between correct labels and XGB model probs")
plt.show()

{1: 'buy', 0: 'sell'} 

CORRECT: true label=1, prob=0.5895295143127441,0.41047048568725586
CORRECT: true label=1, prob=0.5297814607620239,0.4702185392379761
CORRECT: true label=1, prob=0.545541524887085,0.45445847511291504
CORRECT: true label=1, prob=0.5556892156600952,0.4443107843399048
CORRECT: true label=1, prob=0.5123186707496643,0.4876813292503357
WRONG: true label=0, prob=0.5582850575447083,0.44171494245529175
WRONG: true label=0, prob=0.5660315752029419,0.4339684247970581
WRONG: true label=0, prob=0.5056567788124084,0.49434322118759155
WRONG: true label=0, prob=0.5227710008621216,0.4772289991378784
WRONG: true label=0, prob=0.5327770709991455,0.4672229290008545
WRONG: true label=0, prob=0.5528644919395447,0.4471355080604553
WRONG: true label=0, prob=0.6500375866889954,0.34996241331100464
WRONG: true label=0, prob=0.5231617093086243,0.47683829069137573
CORRECT: true label=1, prob=0.5124702453613281,0.4875297546386719
WRONG: true label=1, prob=0.4195222854614258,0.4195222854614258

# RL w/ gym-anytrading

In [None]:
import gym
import gym_anytrading
from gym_anytrading.envs.forex_env import ForexEnv

from gym_anytrading.datasets import FOREX_EURUSD_1H_ASK

from stable_baselines import A2C
from stable_baselines.common.vec_env import DummyVecEnv

import quantstats as qs

##### trying out sample code

In [None]:
df = gym_anytrading.datasets.STOCKS_GOOGL.copy()
df = df.drop(['Adj Close'], axis=1)

window_size = 10
start_index = window_size
end_index = len(df)

env_maker = lambda: gym.make(
    'stocks-v0',
    df = df,
    window_size = window_size,
    frame_bound = (start_index, end_index)
)

env = DummyVecEnv([env_maker])

In [None]:
i, end = no_missing_data_idx_range(data_with_ichi_2, early_ending_cols=['chikou_span_visual'])
train_df = data_with_ichi_2.iloc[i:]
train_df.set_index('datetime', inplace=True, verify_integrity=True)
categories_dict = {
    'quarter': [1,2,3,4],
    'day_of_week': [0,1,2,3,4]
}
train_df = dummy_and_remove_data(train_df, categories_dict=categories_dict, cols_to_remove=['momentum_rsi','month','day','minute','hour','year','spread'],
                                 include_defaults=False)
train_df.head()

In [None]:
class CustomForexEnv(ForexEnv):
    def _process_data(self):
        prices = self.df.loc[:, 'Close'].to_numpy()

        prices[self.frame_bound[0] - self.window_size]  # validate index (TODO: Improve validation)
        prices = prices[self.frame_bound[0]-self.window_size:self.frame_bound[1]]

        diff = np.insert(np.diff(prices), 0, 0)
        signal_features = np.column_stack((prices, diff))
        
        my_features = self.df.iloc[:,4:].to_numpy()
        signal_features = np.column_stack((signal_features, my_features))
#         print(list(signal_features[0]))

        return prices, signal_features

In [None]:
df = train_df

window_size = 10
start_index = window_size
end_index = len(df)

env_maker = lambda: CustomForexEnv(
    df = train_df,
    window_size = window_size,
    frame_bound = (start_index, end_index),
    unit_side = 'right'
)

env = DummyVecEnv([env_maker])

In [None]:
policy_kwargs = dict(net_arch=[64, 'lstm', dict(vf=[128, 128, 128], pi=[64, 64])])
model = A2C('MlpLstmPolicy', env, verbose=1, policy_kwargs=policy_kwargs)
model.learn(total_timesteps=5000)

In [None]:
env = env_maker()
observation = env.reset()
actions = []
while True:
    observation = observation[np.newaxis, ...]

    # action = env.action_space.sample()
    action, _states = model.predict(observation)
    actions.append(action)
    observation, reward, done, info = env.step(action)

    # env.render()
    if done:
        print("info:", info)
        break
print(observation)
# for action in actions:
#     print(action)
# print(len(actions))

In [None]:
%matplotlib qt
plt.figure(figsize=(16, 6))
env.render_all()
plt.show()

In [None]:
%matplotlib inline
qs.extend_pandas()

net_worth = pd.Series(env.history['total_profit'], index=df.index[start_index+1:end_index])
returns = net_worth.pct_change().iloc[1:]

qs.reports.full(returns)
qs.reports.html(returns, output='a2c_quantstats.html')

# CNN Bi-LSTM

In [6]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [7]:
print(f'is GPU available for TF: {tf.test.is_gpu_available()}\n')

gpu_devices = tf.config.list_physical_devices('GPU')
print(f'GPU devices: {gpu_devices}\n')

all_devices = tf.config.list_physical_devices()
print(f'all devices: {all_devices}')

if len(gpu_devices) > 0:
    for device in gpu_devices: 
        tf.config.experimental.set_memory_growth(device, True)

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
is GPU available for TF: True

GPU devices: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

all devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU')]


#### hyperparameters

In [16]:
seq_len = 128
num_epochs = 400
fast_ma_window = 3
slow_ma_window = 7
tenkan_period = 9
kijun_period = 30
senkou_b_period = 60
cur_pair = 'EURUSD'
timeframe = 'H1'
indicators_info = {
    'ichimoku': {
        'tenkan_period': tenkan_period,
        'kijun_period': kijun_period,
        'chikou_period': kijun_period,
        'senkou_b_period': senkou_b_period
    },
    'rsi': {
        'periods': 14
    }
}

ma_cols = ['Open','High','Low','Close','Volume']
pc_cols = ['Open','High','Low','Close','Volume',]
#            'trend_ichimoku_base','trend_ichimoku_conv',
#            'trend_ichimoku_a', 'trend_ichimoku_b']
normalization_groups = [['Open','High','Low','Close'],  # prices
#                         ['trend_ichimoku_base','trend_ichimoku_conv'],  # ichi conv & base lines
#                         ['trend_ichimoku_a', 'trend_ichimoku_b'], # ichi cloud lines
                        ['tk_cross_bull_strength','tk_cross_bear_strength',   # tk cross strength
                        'tk_price_cross_bull_strength','tk_price_cross_bear_strength',   # tk price cross strength
                        'senkou_cross_bull_strength','senkou_cross_bear_strength',   # semkou cross strength
                        'chikou_cross_bull_strength','chikou_cross_bear_strength']]   # chikou cross strength

train_perc = 0.8
val_perc = (1-train_perc)/2
test_perc = val_perc
split_percents = (val_perc, test_perc)

#### get data and preprocess

In [None]:
tick_data_filepath = research.download_mt5_data(cur_pair, timeframe, global_train_data_range_start, global_train_data_range_end)
data_with_indicators = research.add_indicators_to_raw(filepath=tick_data_filepath,
                                                      indicators_info=indicators_info, 
                                                      datetime_col='datetime')
data_with_ichi_sigs = research.add_ichimoku_features(data_with_indicators)

all_data = missing_labels_preprocess(data_with_ichi_sigs,None,None)[0]
all_data_orig = all_data

In [None]:
fast_ma_data = research.get_split_lstm_data(all_data, ma_window=fast_ma_window, seq_len=seq_len, split_percents=split_percents, fully_divisible_batch_sizes=True,
                                             normalization_groups=normalization_groups, pc_cols=pc_cols, ma_cols=ma_cols, min_batch_size=1000, max_batch_size=2000)
slow_ma_data = research.get_split_lstm_data(all_data, ma_window=slow_ma_window, seq_len=seq_len, split_percents=split_percents, fully_divisible_batch_sizes=True,
                                             normalization_groups=normalization_groups, pc_cols=pc_cols, ma_cols=ma_cols, min_batch_size=1000, max_batch_size=2000)

x_train_fast_ma, y_train_fast_ma = fast_ma_data['train_data_np']
x_val_fast_ma, y_val_fast_ma = fast_ma_data['val_data_np']
x_test_fast_ma, y_test_fast_ma = fast_ma_data['test_data_np']

x_train_slow_ma, y_train_slow_ma = slow_ma_data['train_data_np']
x_val_slow_ma, y_val_slow_ma = slow_ma_data['val_data_np']
x_test_slow_ma, y_test_slow_ma = slow_ma_data['test_data_np']

# process orignal price data for plotting comparison

all_data_orig = apply_perc_change(all_data_orig, cols=pc_cols)
all_data_orig.dropna(how='any', axis=0, inplace=True) # drop any NA rows due to applying percentage change

train_data_df_orig = all_data_orig.iloc[:fast_ma_data['train_data_df'].index[-1]+1]
val_data_df_orig = all_data_orig.iloc[fast_ma_data['train_data_df'].index[-1]+1:fast_ma_data['val_data_df'].index[-1]+1]
test_data_df_orig = all_data_orig.iloc[fast_ma_data['val_data_df'].index[-1]+1:]

train_data_df_orig, normalization_terms_2 = normalize_data(train_data_df_orig, train_data=True, groups=normalization_groups)   
val_data_df_orig, normalization_terms_2 = normalize_data(val_data_df_orig, train_data=False, normalization_terms=normalization_terms_2)
test_data_df_orig = normalize_data(test_data_df_orig, train_data=False, normalization_terms=normalization_terms_2)[0]

train_data_orig = train_data_df_orig.to_numpy()
val_data_orig = val_data_df_orig.to_numpy()
test_data_orig = test_data_df_orig.to_numpy()

In [None]:
for tup in ((fast_ma_data, 'Fast MA'), (slow_ma_data, 'Slow MA')):
    data, marker = tup
    
    train_data_df = data['train_data_df']
    val_data_df = data['val_data_df']
    test_data_df = data['test_data_df']

    train_data = train_data_df.to_numpy()
    val_data = val_data_df.to_numpy()
    test_data = test_data_df.to_numpy()

    fig = plt.figure(figsize=(15,10))
    st = fig.suptitle(f'{marker} Data Separation', fontsize=20)
    st.set_y(0.92)

    ###############################################################################

    ax1 = fig.add_subplot(211)
    ax1.plot(np.arange(train_data.shape[0]), train_data_df['Close'], label='Training data')

    ax1.plot(np.arange(train_data.shape[0], 
                       train_data.shape[0]+val_data.shape[0]), val_data_df['Close'], label='Validation data')

    ax1.plot(np.arange(train_data.shape[0]+val_data.shape[0], 
                       train_data.shape[0]+val_data.shape[0]+test_data.shape[0]), test_data_df['Close'], label='Test data')
    ax1.set_xlabel('Date')
    ax1.set_ylabel(f'{marker} Normalized Closing Returns')

    ###############################################################################

    ax2 = fig.add_subplot(212)
    ax2.plot(np.arange(train_data.shape[0]), train_data_df['Volume'], label='Training data')

    ax2.plot(np.arange(train_data.shape[0], 
                       train_data.shape[0]+val_data.shape[0]), val_data_df['Volume'], label='Validation data')

    ax2.plot(np.arange(train_data.shape[0]+val_data.shape[0], 
                       train_data.shape[0]+val_data.shape[0]+test_data.shape[0]), test_data_df['Volume'], label='Test data')
    ax2.set_xlabel('Date')
    ax2.set_ylabel(f'{marker} Normalized Volume Changes')

    plt.legend(loc='best')

#### define model

In [226]:
def Inception_A(layer_in, c7):
    branch1x1_1 = layers.Conv1D(c7, kernel_size=1, padding="same", use_bias=False)(layer_in)
    branch1x1 = layers.BatchNormalization()(branch1x1_1)
    branch1x1 = layers.ReLU()(branch1x1)

    branch5x5_1 = layers.Conv1D(c7, kernel_size=1, padding='same', use_bias=False)(layer_in)
    branch5x5 = layers.BatchNormalization()(branch5x5_1)
    branch5x5 = layers.ReLU()(branch5x5)
    branch5x5 = layers.Conv1D(c7, kernel_size=5, padding='same', use_bias=False)(branch5x5)
    branch5x5 = layers.BatchNormalization()(branch5x5)
    branch5x5 = layers.ReLU()(branch5x5)  

    branch3x3_1 = layers.Conv1D(c7, kernel_size=1, padding='same', use_bias=False)(layer_in)
    branch3x3 = layers.BatchNormalization()(branch3x3_1)
    branch3x3 = layers.ReLU()(branch3x3)
    branch3x3 = layers.Conv1D(c7, kernel_size=3, padding='same', use_bias=False)(branch3x3)
    branch3x3 = layers.BatchNormalization()(branch3x3)
    branch3x3 = layers.ReLU()(branch3x3)
    branch3x3 = layers.Conv1D(c7, kernel_size=3, padding='same', use_bias=False)(branch3x3)
    branch3x3 = layers.BatchNormalization()(branch3x3)
    branch3x3 = layers.ReLU()(branch3x3) 

    branch_pool = layers.AveragePooling1D(pool_size=(3), strides=1, padding='same')(layer_in)
    branch_pool = layers.Conv1D(c7, kernel_size=1, padding='same', use_bias=False)(branch_pool)
    branch_pool = layers.BatchNormalization()(branch_pool)
    branch_pool = layers.ReLU()(branch_pool)
    outputs = layers.Concatenate(axis=-1)([branch1x1, branch5x5, branch3x3, branch_pool])
    return outputs


def Inception_B(layer_in, c7):
    branch3x3 = layers.Conv1D(c7, kernel_size=3, padding="same", strides=2, use_bias=False)(layer_in)
    branch3x3 = layers.BatchNormalization()(branch3x3)
    branch3x3 = layers.ReLU()(branch3x3)  

    branch3x3dbl = layers.Conv1D(c7, kernel_size=1, padding="same", use_bias=False)(layer_in)
    branch3x3dbl = layers.BatchNormalization()(branch3x3dbl)
    branch3x3dbl = layers.ReLU()(branch3x3dbl)  
    branch3x3dbl = layers.Conv1D(c7, kernel_size=3, padding="same", use_bias=False)(branch3x3dbl)  
    branch3x3dbl = layers.BatchNormalization()(branch3x3dbl)
    branch3x3dbl = layers.ReLU()(branch3x3dbl)  
    branch3x3dbl = layers.Conv1D(c7, kernel_size=3, padding="same", strides=2, use_bias=False)(branch3x3dbl)    
    branch3x3dbl = layers.BatchNormalization()(branch3x3dbl)
    branch3x3dbl = layers.ReLU()(branch3x3dbl)   

    branch_pool = layers.MaxPooling1D(pool_size=3, strides=2, padding="same")(layer_in)

    outputs = layers.Concatenate(axis=-1)([branch3x3, branch3x3dbl, branch_pool])
    return outputs


def Inception_C(layer_in, c7):
    branch1x1_1 = layers.Conv1D(c7, kernel_size=1, padding="same", use_bias=False)(layer_in)
    branch1x1 = layers.BatchNormalization()(branch1x1_1)
    branch1x1 = layers.ReLU()(branch1x1)   

    branch7x7_1 = layers.Conv1D(c7, kernel_size=1, padding="same", use_bias=False)(layer_in)
    branch7x7 = layers.BatchNormalization()(branch7x7_1)
    branch7x7 = layers.ReLU()(branch7x7)   
    branch7x7 = layers.Conv1D(c7, kernel_size=(7), padding="same", use_bias=False)(branch7x7)
    branch7x7 = layers.BatchNormalization()(branch7x7)
    branch7x7 = layers.ReLU()(branch7x7)  
    branch7x7 = layers.Conv1D(c7, kernel_size=(1), padding="same", use_bias=False)(branch7x7)  
    branch7x7 = layers.BatchNormalization()(branch7x7)
    branch7x7 = layers.ReLU()(branch7x7)   

    branch7x7dbl_1 = layers.Conv1D(c7, kernel_size=1, padding="same", use_bias=False)(layer_in)  
    branch7x7dbl = layers.BatchNormalization()(branch7x7dbl_1)
    branch7x7dbl = layers.ReLU()(branch7x7dbl)  
    branch7x7dbl = layers.Conv1D(c7, kernel_size=(7), padding="same", use_bias=False)(branch7x7dbl)  
    branch7x7dbl = layers.BatchNormalization()(branch7x7dbl)
    branch7x7dbl = layers.ReLU()(branch7x7dbl) 
    branch7x7dbl = layers.Conv1D(c7, kernel_size=(1), padding="same", use_bias=False)(branch7x7dbl)  
    branch7x7dbl = layers.BatchNormalization()(branch7x7dbl)
    branch7x7dbl = layers.ReLU()(branch7x7dbl)  
    branch7x7dbl = layers.Conv1D(c7, kernel_size=(7), padding="same", use_bias=False)(branch7x7dbl)  
    branch7x7dbl = layers.BatchNormalization()(branch7x7dbl)
    branch7x7dbl = layers.ReLU()(branch7x7dbl)  
    branch7x7dbl = layers.Conv1D(c7, kernel_size=(1), padding="same", use_bias=False)(branch7x7dbl)  
    branch7x7dbl = layers.BatchNormalization()(branch7x7dbl)
    branch7x7dbl = layers.ReLU()(branch7x7dbl)  

    branch_pool = layers.AveragePooling1D(pool_size=3, strides=1, padding='same')(layer_in)
    branch_pool = layers.Conv1D(c7, kernel_size=1, padding='same', use_bias=False)(branch_pool)
    branch_pool = layers.BatchNormalization()(branch_pool)
    branch_pool = layers.ReLU()(branch_pool)  

    outputs = layers.Concatenate(axis=-1)([branch1x1, branch7x7, branch7x7dbl, branch_pool])
    return outputs


def create_model(seq_len, num_features):
    in_seq = layers.Input(shape=(seq_len, num_features))

    x = Inception_A(in_seq, 32)
    x = Inception_A(x, 32)
    x = Inception_B(x, 32)
    x = Inception_B(x, 32)
    x = Inception_C(x, 32)
    x = Inception_C(x, 32)    

    x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
    x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
    x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x) 

    avg_pool = layers.GlobalAveragePooling1D()(x)
    max_pool = layers.GlobalMaxPooling1D()(x)
    conc = layers.concatenate([avg_pool, max_pool])
    conc = layers.Dense(64, activation="relu")(conc)
    out = layers.Dense(1, activation="sigmoid")(conc)      

    model = keras.Model(inputs=in_seq, outputs=out)
    model.compile(loss="mse", optimizer="adam", metrics=['mae', 'mape'])     
    return model

# def create_model(seq_len, num_features):
#     in_seq = layers.Input(shape = (seq_len, num_features))

#     x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(in_seq)
#     x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
#     x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x) 

#     avg_pool = layers.GlobalAveragePooling1D()(x)
#     max_pool = layers.GlobalMaxPooling1D()(x)
#     conc = layers.concatenate([avg_pool, max_pool])
#     conc = layers.Dense(64, activation="relu")(conc)
#     out = layers.Dense(1, activation="linear")(conc)      

#     model = keras.Model(inputs=in_seq, outputs=out)
#     model.compile(loss="mse", optimizer="adam", metrics=['mae', 'mape'])    
#     return model

def create_model_binary(seq_len, num_features):
#     in_seq = layers.Input(shape=(seq_len, num_features))

#     x = Inception_A(in_seq, 32)
#     x = Inception_A(x, 32)
#     x = Inception_B(x, 32)
#     x = Inception_B(x, 32)
#     x = Inception_C(x, 32)
#     x = Inception_C(x, 32)    

#     x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
#     x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
#     x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x) 

#     avg_pool = layers.GlobalAveragePooling1D()(x)
#     max_pool = layers.GlobalMaxPooling1D()(x)
#     conc = layers.concatenate([avg_pool, max_pool])
#     conc = layers.Dense(64, activation="relu")(conc)
#     out = layers.Dense(1, activation="sigmoid")(conc)      

#     model = keras.Model(inputs=in_seq, outputs=out)
#     model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy', 'AUC']) 

    model = keras.Sequential()
    model.add(layers.Input(shape=(seq_len, num_features)))
    model.add(layers.Conv1D(filters=16, kernel_size=3, padding='same', activation='relu'))
    model.add(layers.MaxPooling1D(pool_size=2))
    model.add(layers.LSTM(64, return_sequences=False))  # should have return_sequences=False before a dense layer and true before another RNN type layer
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', 'AUC'])
    return model

#### try using model as buy/sell classifier 

In [227]:
# hyperparameters

label_non_signals = False
min_profit_percent, profit_noise_percent = 0.01, 0.01
contract_size = 100_000   # size of 1 lot is typically 100,000 (100 for gold, becuase 1 lot = 100 oz of gold)
lots_per_trade = 0.2  
in_quote_currency = True
pip_resolution = 0.0001

labels_dict = {1: 'buy', 0: 'sell'}

signals_to_consider = ['cloud_breakout_bull','cloud_breakout_bear',                       # cloud breakout
                       'tk_cross_bull_strength', 'tk_cross_bear_strength',                # Tenkan Sen / Kijun Sen Cross
                       'tk_price_cross_bull_strength', 'tk_price_cross_bear_strength',    # price crossing both the Tenkan Sen / Kijun Sen
                       'senkou_cross_bull_strength', 'senkou_cross_bear_strength',        # Senkou Span Cross
                       'chikou_cross_bull_strength', 'chikou_cross_bear_strength']        # Chikou Span Cross
sigs_for_filename = 'cb-tk-tkp-sen-chi'

# get data

cur_pair = 'EURUSD'
timeframe = 'H1'
tick_data_filepath = research.download_mt5_data(cur_pair, timeframe, global_train_data_range_start, global_train_data_range_end)
data_with_indicators = research.add_indicators_to_raw(filepath=tick_data_filepath, 
                                                      indicators_info=indicators_info, 
                                                      datetime_col='datetime')
train_data = research.add_ichimoku_features(data_with_indicators)

train_data_labels = generate_ichimoku_labels(train_data, label_non_signals=label_non_signals, min_profit_percent=min_profit_percent, 
                                             profit_noise_percent=profit_noise_percent, signals_to_consider=signals_to_consider, 
                                             contract_size=contract_size, lots_per_trade=lots_per_trade,
                                             in_quote_currency=in_quote_currency,pip_resolution=pip_resolution)

start_idx, end_idx = no_missing_data_idx_range(train_data, early_ending_cols=['chikou_span_visual'])
train_data = train_data.iloc[start_idx:end_idx+1]
train_data = dummy_and_remove_features(train_data)
train_data_labels = train_data_labels.iloc[start_idx:end_idx+1]

loaded 60431 rows of tick data from C:\GitHub Repos\ForexMachine\Data\.cache\mt5_EURUSD_h1_ticks_2011-01-01T00;00UTC_to_2020-10-01T00;00UTC.csv
saved 60431 rows of EURUSD h1 tick data to C:\GitHub Repos\ForexMachine\Data\RawData\mt5_EURUSD_h1_ticks_2011-01-01T00;00UTC_to_2020-10-01T00;00UTC.csv, done.


In [229]:
data_dict = research.get_split_lstm_data(train_data, ma_window=7, seq_len=128, split_percents=split_percents, fully_divisible_batch_sizes=True,
                                          normalization_groups=normalization_groups, pc_cols=pc_cols, ma_cols=ma_cols, min_batch_size=1000, max_batch_size=2000,
                                          buy_sell_labels_df=train_data_labels['first_decision'], apply_pct_change=True)

x_train, y_train = data_dict['train_data_np']
x_val, y_val = data_dict['val_data_np']
x_test, y_test = data_dict['test_data_np']

binary_model = create_model_binary(seq_len=x_train.shape[1], num_features=x_train.shape[2])
    
filepath = f'../my_stuff/{cur_pair}-{timeframe}_{min_profit_percent}-min_profit_{lots_per_trade}-lots_{currency_side}-cur_side' \
            f'_{tenkan_period}-{kijun_period}-{senkou_b_period}-{sigs_for_filename}-ichi_cnn-lstm_classifier.hdf5'
callback = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_accuracy', save_best_only=True, verbose=1)
                                    
binary_model.fit(convert_to_tensor(x_train), convert_to_tensor(y_train),
                  batch_size=data_dict['eval_batch_size'],
                  callbacks=[callback],
                  epochs=num_epochs,
                  validation_data=(convert_to_tensor(x_val), convert_to_tensor(y_val)))

binary_model = tf.keras.models.load_model(filepath)

eval_results = binary_model.evaluate(convert_to_tensor(x_test), convert_to_tensor(y_test), return_dict=True)
print(eval_results)

------------------------------------------------------
data w/ moving average window of None info:

batch size for evaluation: 1297
training data size reduction for evaulation: 9079 -> 9079
batch size for final training: 1874
training data size reduction for final training: 11245 -> 11244

training data shape: x=(9079, 128, 30), y=(9079,)
validation data shape: x=(1087, 128, 30), y=(1087,)
test data shape: x=(1032, 128, 30), y=(1032,)
all train data shape: x=(11244, 128, 30), y=(11244,)
------------------------------------------------------
Epoch 1/400
Epoch 00001: val_accuracy improved from -inf to 0.40846, saving model to ../my_stuff/EURUSD-H1_0.01-min_profit_0.2-lots_right-cur_side_9-30-60-cb-tk-tkp-sen-chi-ichi_cnn-lstm_classifier.hdf5
Epoch 2/400
Epoch 00002: val_accuracy improved from 0.40846 to 0.45262, saving model to ../my_stuff/EURUSD-H1_0.01-min_profit_0.2-lots_right-cur_side_9-30-60-cb-tk-tkp-sen-chi-ichi_cnn-lstm_classifier.hdf5
Epoch 3/400
Epoch 00003: val_accuracy did no

#### try using model for close price forcasting

In [None]:
fast_ma_window = fast_ma_data['ma_window']
filepath = f'../my_stuff/{cur_pair}-{timeframe}_Bi-LSTM_{fast_ma_window}-ma_{tenkan_period}-{kijun_period}-{senkou_b_period}-ichi.hdf5'
callback = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', save_best_only=True, verbose=1)

fast_ma_model = create_model(seq_len=x_train_fast_ma.shape[1], num_features=x_train_fast_ma.shape[2])
# print(fast_ma_model.summary())

start_t = time.time()

fast_ma_model.fit(convert_to_tensor(x_train_fast_ma), convert_to_tensor(y_train_fast_ma),
                  batch_size=fast_ma_data['eval_batch_size'],
                  callbacks=[callback],
                  epochs=num_epochs,
                  validation_data=(convert_to_tensor(x_val_fast_ma), convert_to_tensor(y_val_fast_ma)))

print(f'training time = {(time.time()-start_t)/60} min')

In [None]:
slow_ma_window = slow_ma_data['ma_window']
filepath = f'../my_stuff/{cur_pair}-{timeframe}_Bi-LSTM_{slow_ma_window}-ma_{tenkan_period}-{kijun_period}-{senkou_b_period}-ichi.hdf5'
callback = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', save_best_only=True, verbose=1)

slow_ma_model = create_model(seq_len=x_train_slow_ma.shape[1], num_features=x_train_slow_ma.shape[2])

start_t = time.time()

slow_ma_model.fit(convert_to_tensor(x_train_slow_ma), convert_to_tensor(y_train_slow_ma),
                  batch_size=slow_ma_data['eval_batch_size'],
                  callbacks=[callback],
                  epochs=num_epochs,
                  #shuffle=True,
                  validation_data=(convert_to_tensor(x_val_slow_ma), convert_to_tensor(y_val_slow_ma)))

print(f'training time = {(time.time()-start_t)/60} min')

In [None]:
fast_ma_model = tf.keras.models.load_model('../my_stuff/final_Bi-LSTM_fast_5_ma.hdf5')#('../my_stuff/Bi-LSTM_mov_avg_5.hdf5')

print('done loading fast ma model')

# #Calculate predication for training, validation and test data
# train_pred_fast_ma = fast_ma_model.predict(convert_to_tensor(x_train_fast_ma))
# val_pred_fast_ma = fast_ma_model.predict(convert_to_tensor(x_val_fast_ma))
# test_pred_fast_ma = fast_ma_model.predict(convert_to_tensor(x_test_fast_ma))

#Print evaluation metrics for all datasets
train_eval_fast_ma = fast_ma_model.evaluate(convert_to_tensor(x_train_fast_ma), convert_to_tensor(y_train_fast_ma), verbose=0)
val_eval_fast_ma = fast_ma_model.evaluate(convert_to_tensor(x_val_fast_ma), convert_to_tensor(y_val_fast_ma), verbose=0)
test_eval_fast_ma = fast_ma_model.evaluate(convert_to_tensor(x_test_fast_ma), convert_to_tensor(y_test_fast_ma), verbose=0)

print('Evaluation metrics')
print('Training Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(train_eval_fast_ma[0], train_eval_fast_ma[1], train_eval_fast_ma[2]))
print('Validation Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(val_eval_fast_ma[0], val_eval_fast_ma[1], val_eval_fast_ma[2]))
print('Test Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(test_eval_fast_ma[0], test_eval_fast_ma[1], test_eval_fast_ma[2]))

In [None]:
slow_ma_model = tf.keras.models.load_model('../my_stuff/final_Bi-LSTM_slow_13_ma.hdf5')#('../my_stuff/Bi-LSTM_mov_avg_13.hdf5')

print('done loading slow ma model')

#Calculate predication for training, validation and test data
train_pred_slow_ma = slow_ma_model.predict(convert_to_tensor(x_train_slow_ma))
val_pred_slow_ma = slow_ma_model.predict(convert_to_tensor(x_val_slow_ma))
test_pred_slow_ma = slow_ma_model.predict(convert_to_tensor(x_test_slow_ma))

#Print evaluation metrics for all datasets
train_eval_slow_ma = slow_ma_model.evaluate(convert_to_tensor(x_train_slow_ma), convert_to_tensor(y_train_slow_ma), verbose=0)
val_eval_slow_ma = slow_ma_model.evaluate(convert_to_tensor(x_val_slow_ma), convert_to_tensor(y_val_slow_ma), verbose=0)
test_eval_slow_ma = slow_ma_model.evaluate(convert_to_tensor(x_test_slow_ma), convert_to_tensor(y_test_slow_ma), verbose=0)

print('Evaluation metrics')
print('Training Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(train_eval_slow_ma[0], train_eval_slow_ma[1], train_eval_slow_ma[2]))
print('Validation Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(val_eval_slow_ma[0], val_eval_slow_ma[1], val_eval_slow_ma[2]))
print('Test Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(test_eval_slow_ma[0], test_eval_slow_ma[1], test_eval_slow_ma[2]))

In [None]:
test_data_fast_ma = fast_ma_data['test_data_df'].to_numpy()

test_data_slow_ma = slow_ma_data['test_data_df'].to_numpy()

fig = plt.figure(figsize=(15,15))
st = fig.suptitle("CNN + Bi-LSTM Model", fontsize=22)
st.set_y(1.02)

# #Plot training data results
# ax11 = fig.add_subplot(311)
# ax11.plot(train_data[seq_len:, 3], label='EURUSD Closing Returns')
# ax11.plot(train_pred, color='yellow', linewidth=3, label='Predicted EURUSD Closing Returns')
# ax11.set_title("Training Data", fontsize=18)
# ax11.set_xlabel('Date')
# ax11.set_ylabel('EURUSD Closing Returns')

# #Plot validation data results
# ax21 = fig.add_subplot(312)
# ax21.plot(val_data[seq_len:, 3], label='EURUSD Closing Returns')
# ax21.plot(val_pred, color='yellow', linewidth=3, label='Predicted EURUSD Closing Returns')
# ax21.set_title("Validation Data", fontsize=18)
# ax21.set_xlabel('Date')
# ax21.set_ylabel('EURUSD Closing Returns')

#Plot test data results
ax31 = fig.add_subplot(111)
ax31.plot(test_data_fast_ma[seq_len:, 3], label='EURUSD closing mov avg 5')
ax31.plot(test_pred_fast_ma, linewidth=3, label='Predicted EURUSD closing mov avg 5')
ax31.plot(test_data_slow_ma[seq_len:, 3], label='EURUSD closing mov avg 13')
ax31.plot(test_pred_slow_ma, linewidth=3, label='Predicted EURUSD closing mov avg 13')
ax31.plot(test_data_orig[:, 3], label='Original EURUSD Closing Returns')
ax31.set_title("Test Data", fontsize=18)
ax31.set_xlabel('Date')
ax31.set_ylabel('EURUSD Closing Returns')

plt.style.use('seaborn')
plt.tight_layout()
plt.legend(loc='best')
plt.show()

#### train models for backtesting

In [None]:
fast_ma_window = fast_ma_data['ma_window']
filepath = f'../my_stuff/final_{cur_pair}-{timeframe}_Bi-LSTM_{fast_ma_window}-ma_{tenkan_period}-{kijun_period}-{senkou_b_period}-ichi.hdf5'
callback = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='loss', save_best_only=True, verbose=1)

all_x_train_fast_ma, all_y_train_fast_ma = fast_ma_data['all_train_data_np']
fast_ma_model = create_model(seq_len=all_x_train_fast_ma.shape[1], num_features=all_x_train_fast_ma.shape[2])

start_t = time.time()

fast_ma_model.fit(conc8vert_to_tensor(all_x_train_fast_ma), convert_to_tensor(all_y_train_fast_ma),
                  batch_size=fast_ma_data['final_batch_size'],
                  callbacks=[callback],
                  epochs=num_epochs)

print(f'training time = {(time.time()-start_t)/60} min')

In [None]:
slow_ma_window = slow_ma_data['ma_window']
filepath = f'../my_stuff/final_{cur_pair}-{timeframe}_Bi-LSTM_{slow_ma_window}-ma_{tenkan_period}-{kijun_period}-{senkou_b_period}-ichi.hdf5'
callback = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='loss', save_best_only=True, verbose=1)

all_x_train_slow_ma, all_y_train_slow_ma = slow_ma_data['all_train_data_np']
slow_ma_model = create_model(seq_len=all_x_train_slow_ma.shape[1], num_features=all_x_train_slow_ma.shape[2])

start_t = time.time()

slow_ma_model.fit(convert_to_tensor(all_x_train_slow_ma), convert_to_tensor(all_y_train_slow_ma),
                  batch_size=slow_ma_data['final_batch_size'],
                  callbacks=[callback],
                  epochs=num_epochs)

print(f'training time = {(time.time()-start_t)/60} min')

# backtest models (xgboost for opening and CNN+Bi-LSTM for closing)

#### global hyperparameters for backtest

In [97]:
# all parameters SHOULD match what the models were trained on for best results (so far this assumption is consistant)

# independant params
min_profit_percent, profit_noise_percent = 0.0016, 0.0016
contract_size = 100_000   # size of 1 lot is typically 100,000 (100 for gold, becuase 1 lot = 100 oz of gold)
lots_per_trade = 0.2  
starting_balance = 1000
leverage = 500    # 1:leverage
max_concurrent_trades = np.inf # 5
currency_side = 'right'
in_quote_currency = True if currency_side == 'right' else False
pip_resolution = 0.0001
stop_out_pct = 0.2  # explaination: https://www.tradersway.com/new_to_the_market/forex_and_cfd_basics#margin
fast_ma_diff_thresh = 0.01  #.01
# slow_ma_diff_thresh = 0.05   #0.02  
decision_prob_diff_thresh = 0.5   # 0.5 accepts all probabilities
tenkan_period = 9
kijun_period = 30
senkou_b_period = 60
label_non_signals=False
hedged_margin = 50_000
tradersway_commodity = False
cur_pair = 'EURUSD'
timeframe ='H1'

# dependant params (don't edit)
pip_value = contract_size * lots_per_trade * pip_resolution   # in quote currency (right side currency of currency pair)
min_profit = min_profit_percent * lots_per_trade * contract_size   # in base currecy because thats what models were traied on
profit_noise = profit_noise_percent * lots_per_trade * contract_size   # in base currecy because thats what models were traied on

indicators_info = {
    'ichimoku': {
        'tenkan_period': tenkan_period,
        'kijun_period': kijun_period,
        'chikou_period': kijun_period,
        'senkou_b_period': senkou_b_period
    },
    'rsi': {
        'periods': 14
    }
}

#### prepare CNN+Bi-LSTM models and preprocessing vars

In [99]:
reload(research)

fast_ma_model = tf.keras.models.load_model('../my_stuff/final_EURUSD-H1_Bi-LSTM_7-ma_9-30-60-ichi.hdf5')
# slow_ma_model = tf.keras.models.load_model('../my_stuff/final_EURUSD-H1_Bi-LSTM_7-ma_9-30-60-ichi.hdf5')

fast_ma_window = 7
# slow_ma_window = 7
lstm_seq_len = 128

# lstm_decision_predictor = tf.keras.models.load_model('../my_stuff/EURUSD-H1_0.01-min_profit_0.2-lots_right-cur_side'
#                                                      '_9-30-60-cb-tk-tkp-sen-chi-ichi_cnn-lstm_classifier.hdf5')


tick_data_filepath = research.download_mt5_data(cur_pair, timeframe, global_train_data_range_start, global_train_data_range_end)
data_with_indicators = research.add_indicators_to_raw(filepath=tick_data_filepath,
                                                      indicators_info=indicators_info, 
                                                      datetime_col='datetime')
train_data = research.add_ichimoku_features(data_with_indicators)

# train_data_labels = generate_ichimoku_labels(train_data, label_non_signals=label_non_signals, min_profit_percent=min_profit_percent, 
#                                              profit_noise_percent=profit_noise_percent, signals_to_consider=signals_to_consider, 
#                                              contract_size=contract_size, lots_per_trade=lots_per_trade,
#                                              in_quote_currency=in_quote_currency,pip_resolution=pip_resolution)

start_idx, end_idx = research.no_missing_data_idx_range(train_data, early_ending_cols=['chikou_span_visual'])
train_data = train_data.iloc[start_idx:end_idx+1]
train_data = research.dummy_and_remove_features(train_data)
# train_data_labels = train_data_labels.iloc[start_idx:end_idx+1]

ma_cols = ['Open','High','Low','Close','Volume']
pc_cols = ['Open','High','Low','Close','Volume',
           'trend_ichimoku_base','trend_ichimoku_conv',
           'trend_ichimoku_a', 'trend_ichimoku_b']
normalization_groups = [['Open','High','Low','Close'],  # prices
                        ['trend_ichimoku_base','trend_ichimoku_conv'],  # ichi conv & base lines
                        ['trend_ichimoku_a', 'trend_ichimoku_b'], # ichi cloud lines
                        ['tk_cross_bull_strength','tk_cross_bear_strength',   # tk cross strength
                        'tk_price_cross_bull_strength','tk_price_cross_bear_strength',   # tk price cross strength
                        'senkou_cross_bull_strength','senkou_cross_bear_strength',   # semkou cross strength
                        'chikou_cross_bull_strength','chikou_cross_bear_strength']]   # chikou cross strength

fast_ma_data = research.get_split_lstm_data(train_data, ma_window=fast_ma_window, seq_len=lstm_seq_len, split_percents=(0,0), fully_divisible_batch_sizes=True,
                                 normalization_groups=normalization_groups, pc_cols=pc_cols, ma_cols=ma_cols, min_batch_size=1000, max_batch_size=2000)
fast_ma_norm_terms = fast_ma_data['all_train_normalization_terms']

# lstm_data = research.get_split_lstm_data(train_data, ma_window=None, seq_len=lstm_seq_len, split_percents=(0,0), fully_divisible_batch_sizes=True,
#                               normalization_groups=normalization_groups, pc_cols=pc_cols, ma_cols=ma_cols, min_batch_size=1000, max_batch_size=2000,
#                               buy_sell_labels_df=train_data_labels['first_decision'], apply_pct_change=True)
# lstm_norm_terms = lstm_data['sub_train_normalization_terms']

print(f'fast MA window: {fast_ma_window}')
# print(f'slow MA window: {slow_ma_window}')
print(f'sequence length for LSTMs: {lstm_seq_len}')

loaded 60306 rows of tick data from C:\GitHub Repos\ForexMachine\ForexMachine\PackageData\TicksData\mt5_EURUSD_h1_ticks_2011-01-01T00;00UTC_to_2020-10-01T00;00UTC.csv
------------------------------------------------------
data w/ moving average window of 7 info:

batch size for evaluation: 0
training data size reduction for evaulation: 0 -> 0
batch size for final training: 1174
training data size reduction for final training: 59876 -> 59874

training data shape: x=(0,), y=(0,)
validation data shape: x=(0,), y=(0,)
test data shape: x=(59876, 128, 30), y=(59876,)
all train data shape: x=(59874, 128, 30), y=(59874,)
------------------------------------------------------
fast MA window: 7
sequence length for LSTMs: 128


#### prepare xgboost models and preprocessing vars

In [100]:
xgb_decision_predictor = xgb.Booster()
xgb_decision_predictor.load_model('../my_stuff/EURUSD-H1_0.01-min_profit_0.2-lots_right-cur_side_9-30-60-cb-tk-tkp-sen-chi-ichi_xgb_classifier.json')
xgb_labels_dict = {1: 'buy', 0: 'sell'}
open_trade_sigs = ['cloud_breakout_bull','cloud_breakout_bear',                       # cloud breakout
                   'tk_cross_bull_strength', 'tk_cross_bear_strength',                # Tenkan Sen / Kijun Sen Cross
                   'tk_price_cross_bull_strength', 'tk_price_cross_bear_strength',    # price crossing both the Tenkan Sen / Kijun Sen
                   'senkou_cross_bull_strength', 'senkou_cross_bear_strength',        # Senkou Span Cross
                   'chikou_cross_bull_strength', 'chikou_cross_bear_strength']        # Chikou Span Cross

print(f'labels dict for XGB classifier:\n\t{xgb_labels_dict}')
print('signals to consider for opening trades:')
for sig in open_trade_sigs:
    print(f'\t{sig}')

labels dict for XGB classifier:
	{1: 'buy', 0: 'sell'}
signals to consider for opening trades:
	cloud_breakout_bull
	cloud_breakout_bear
	tk_cross_bull_strength
	tk_cross_bear_strength
	tk_price_cross_bull_strength
	tk_price_cross_bear_strength
	senkou_cross_bull_strength
	senkou_cross_bear_strength
	chikou_cross_bull_strength
	chikou_cross_bear_strength


#### prepare data for backtest

In [101]:
tick_data_filepath = research.download_mt5_data("EURUSD", 'H1', '2020-10-02', '2021-01-05')
# tick_data_filepath = research.download_mt5_data("EURUSD", 'H1', '2020-11-02', '2021-01-05')
data_with_indicators = research.add_indicators_to_raw(filepath=tick_data_filepath, 
                                                      indicators_info=indicators_info, 
                                                      datetime_col='datetime')

test_data_with_ichi_sigs = research.add_ichimoku_features(data_with_indicators)
model_data = research.dummy_and_remove_features(test_data_with_ichi_sigs)

start, stop = research.no_missing_data_idx_range(model_data, early_ending_cols=['chikou_span_visual'])

model_data = model_data.iloc[start:stop+1]
model_data_np = model_data.to_numpy()

test_data_with_ichi_sigs = test_data_with_ichi_sigs.iloc[start:stop+1]
test_data_np = test_data_with_ichi_sigs.to_numpy()

ma_cols_set = set([model_data.columns.get_loc(col_name) for col_name in ma_cols])
pc_cols_set = set([model_data.columns.get_loc(col_name) for col_name in pc_cols])

feature_indices = {test_data_with_ichi_sigs.columns[i]: i for i in range(len(test_data_with_ichi_sigs.columns))}

test_data_labels = research.generate_ichimoku_labels(test_data_with_ichi_sigs, label_non_signals=label_non_signals, min_profit_percent=min_profit_percent, 
                                                    profit_noise_percent=profit_noise_percent, signals_to_consider=open_trade_sigs, 
                                                    contract_size=contract_size, lots_per_trade=lots_per_trade,
                                                    in_quote_currency=in_quote_currency, pip_resolution=pip_resolution)

loaded 1547 rows of tick data from C:\GitHub Repos\ForexMachine\ForexMachine\PackageData\TicksData\mt5_EURUSD_h1_ticks_2020-10-02T00;00UTC_to_2021-01-05T00;00UTC.csv


In [102]:
print(model_data.columns, len(model_data.columns))
model_data

Index(['Open', 'High', 'Low', 'Close', 'Volume', 'trend_ichimoku_conv',
       'trend_ichimoku_base', 'trend_ichimoku_a', 'trend_ichimoku_b',
       'is_price_above_cb_lines', 'is_price_above_cloud',
       'is_price_inside_cloud', 'is_price_below_cloud', 'cloud_breakout_bull',
       'cloud_breakout_bear', 'tk_cross_bull_strength',
       'tk_cross_bear_strength', 'tk_price_cross_bull_strength',
       'tk_price_cross_bear_strength', 'senkou_cross_bull_strength',
       'senkou_cross_bear_strength', 'chikou_cross_bull_strength',
       'chikou_cross_bear_strength', 'quarter_2', 'quarter_3', 'quarter_4',
       'day_of_week_1', 'day_of_week_2', 'day_of_week_3', 'day_of_week_4'],
      dtype='object') 30


Unnamed: 0,Open,High,Low,Close,Volume,trend_ichimoku_conv,trend_ichimoku_base,trend_ichimoku_a,trend_ichimoku_b,is_price_above_cb_lines,...,senkou_cross_bear_strength,chikou_cross_bull_strength,chikou_cross_bear_strength,quarter_2,quarter_3,quarter_4,day_of_week_1,day_of_week_2,day_of_week_3,day_of_week_4
115,1.17545,1.17598,1.17515,1.17583,2075,1.175085,1.175730,1.175407,1.176635,True,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
116,1.17583,1.17595,1.17546,1.17575,1440,1.175085,1.175720,1.175403,1.176635,True,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
117,1.17575,1.17605,1.17537,1.17591,1298,1.175085,1.175720,1.175403,1.176635,True,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
118,1.17591,1.17615,1.17566,1.17582,1131,1.175085,1.175720,1.175403,1.176635,True,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
119,1.17582,1.17602,1.17517,1.17571,938,1.174715,1.175720,1.175218,1.176635,False,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1542,1.22529,1.22565,1.22425,1.22509,3526,1.227575,1.225905,1.226740,1.225945,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1543,1.22509,1.22523,1.22439,1.22521,2837,1.227575,1.225905,1.226740,1.225945,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1544,1.22521,1.22521,1.22414,1.22492,2972,1.227520,1.225905,1.226713,1.225945,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1545,1.22492,1.22523,1.22417,1.22452,1774,1.227520,1.225905,1.226713,1.225945,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### analyze test data to develop trading strategy

In [38]:
# plot move avg cnn+lstm preds vs price data

fast_ma_preds = fast_ma_model.predict(convert_to_tensor(fast_ma_data['all_train_data_np'][0]))
fast_ma_preds = np.reshape(fast_ma_preds,(fast_ma_preds.shape[0],))
fast_ma_preds = fast_ma_preds.tolist()
fill = [None]*(len(test_data_with_ichi_sigs) - len(fast_ma_preds))
fill.extend(fast_ma_preds)
fast_ma_preds = fill

# slow_ma_preds = slow_ma_model.predict(convert_to_tensor(slow_ma_data['all_train_data_np'][0]))
# slow_ma_preds = np.reshape(slow_ma_preds,(slow_ma_preds.shape[0],))
# slow_ma_preds = slow_ma_preds.tolist()
# fill = [None]*(len(test_data_with_ichi_sigs) - len(slow_ma_preds))
# fill.extend(slow_ma_preds)
# slow_ma_preds = fill

lstm_preds = pd.DataFrame({
    'fast_ma':fast_ma_preds, 
#     'slow_ma':slow_ma_preds
})

# import random
# test_data = [random.random() for i in range(len(test_data_with_ichi_sigs))]
# test_data2 = [random.random() for i in range(len(test_data_with_ichi_sigs))]
# test_data = {'testing1': test_data,
#              'testing2': test_data2}
# test_data=pd.DataFrame(test_data)
# show_data_from_range(test_data_with_ichi_sigs, '2020-10-12', '2020-10-16', 
#                      main_indicator='ichimoku', sub_indicators=[test_data,'rsi'], visualize_crosses=True,
#                      visualize_labels=True, labels_df=test_data_labels)

labels = ['first_decision','ticks_till_best_profit_first_decision', 'best_profit_first_decision', 'profit_peak_first_decision',
          'second_decision','ticks_till_best_profit_second_decision', 'best_profit_second_decision', 'profit_peak_second_decision']
show_data_from_range(test_data_with_ichi_sigs, '2020-11-12', '2020-12-17', 
                     main_indicator='ichimoku', sub_indicators=[lstm_preds], visualize_crosses=True,
                     visualize_labels=True, labels_df=test_data_labels, labels=labels)

#### backtest strat

In [103]:
trades = {}
backtest_trades = {}   # closed trades results
pending_order = None
pending_close = None
decisions_so_far = []
fast_ma_seq_buf = deque()
slow_ma_seq_buf = deque()
fast_ma_window_buf = deque()
slow_ma_window_buf = deque()
fast_ma_avgs = []
slow_ma_avgs = []
fast_ma_perc_chngs = []
slow_ma_perc_chngs = []
xgb_model_perc_chngs = []
fast_ma_preds = []
slow_ma_preds = []
cnn_lstm_pred_times = []
xgb_pred_times = []
free_margins = []
margins = []
margin_levels = []
equities = []
balances = []
open_trades_counts = []
losing_trades_counts = []
pct_done = 0
buffers_rdy_idx = None
balance = starting_balance
equity = starting_balance
free_margin = starting_balance
losing_trades = 0
margin_level = None
margin = None
final_dt = None
stop = False

start_time = time.time()
for i in range(len(test_data_np)):
    """
    fill data buffers for models
    """

    # for xgb model

    if i > 0:
        row = research.apply_perc_change_list(model_data_np[i-1], model_data_np[i], cols_set=pc_cols_set)
        xgb_model_perc_chngs.append(row)

    # for fast MA model

    fast_ma_window_buf.append(model_data_np[i])
    if len(fast_ma_window_buf) > fast_ma_window:
        fast_ma_window_buf.popleft()

    if len(fast_ma_window_buf) == fast_ma_window:
        row = research.apply_moving_avg_q(fast_ma_window_buf, ma_cols_set)
        fast_ma_avgs.append(row)

    if len(fast_ma_avgs) >= 2:
        row = research.apply_perc_change_list(fast_ma_avgs[-2], fast_ma_avgs[-1], pc_cols_set)
        row = research.normalize_data_list(row, fast_ma_norm_terms)
        fast_ma_perc_chngs.append(row) 

    if len(fast_ma_perc_chngs) > 0:
        fast_ma_seq_buf.append(fast_ma_perc_chngs[-1])

    if len(fast_ma_seq_buf) > lstm_seq_len:
        fast_ma_seq_buf.popleft()

    # for slow MA model

#     slow_ma_window_buf.append(model_data_np[i])
#     if len(slow_ma_window_buf) > slow_ma_window:
#         slow_ma_window_buf.popleft()

#     if len(slow_ma_window_buf) == slow_ma_window:
#         row = apply_moving_avg_q(slow_ma_window_buf, ma_cols_set)
#         slow_ma_avgs.append(row)

#     if len(slow_ma_avgs) >= 2:
#         row = apply_perc_change_list(slow_ma_avgs[-2], slow_ma_avgs[-1], pc_cols_set)
#         row = normalize_data_list(row, slow_ma_norm_terms)
#         slow_ma_perc_chngs.append(row)  

#     if len(slow_ma_perc_chngs) > 0:
#         slow_ma_seq_buf.append(slow_ma_perc_chngs[-1])

#     if len(slow_ma_seq_buf) > lstm_seq_len:
#         slow_ma_seq_buf.popleft()

    # now check if LSTMs have enough data to being trade simulation

#     if len(fast_ma_seq_buf) == lstm_seq_len and len(slow_ma_seq_buf) == lstm_seq_len:
    if len(fast_ma_seq_buf) == lstm_seq_len:
        """
        simulate trading
        """

        if buffers_rdy_idx is None:
            buffers_rdy_idx = i
            print('model buffers full, beginning trade sim...')

        # look for ichiomku signals
        causes = []
        for sig in open_trade_sigs:
            sig_i = feature_indices[sig]
            if test_data_np[i][sig_i] != 0:
                causes.append(sig)

        start = time.time()
        fast_ma_pred = fast_ma_model.predict(np.array([fast_ma_seq_buf]))
        slow_ma_pred = [[0]] #slow_ma_model.predict(np.array([slow_ma_seq_buf]))
        duration = time.time() - start
        cnn_lstm_pred_times.append(duration)

        fast_ma_preds.append(fast_ma_pred[0][0])
        slow_ma_preds.append(slow_ma_pred[0][0])

        if len(fast_ma_preds) > 1:
            fast_ma_diff = fast_ma_preds[-1] - fast_ma_preds[-2]    # remember this is the diff in the pct_change of the mov avg
            slow_ma_diff = slow_ma_preds[-1] - slow_ma_preds[-2]
        else:
            fast_ma_diff = 0
            slow_ma_diff = 0

        if pending_order is not None:
            pending_order_i, decision_label, decision_prob, order_causes, sig_fast_ma_diff, sig_slow_ma_diff = pending_order
            open_price = test_data_np[i][feature_indices['Open']]
            decision_prob_diff = abs(decision_label-decision_prob)

            trades[pending_order_i] = {
                'decision_label': decision_label,
                'decision_prob': decision_prob,
                'causes': order_causes,
                'open_price': open_price,
                'trade_open_tick_i': i,
                'profit': None,
                'best_profit': None,
                'ticks_till_close': None,
                'close_idx': None,
                'lots': lots_per_trade,
                'look_to_close': False,
                'forced_close': False,
                'fast_ma_diff_at_sig': sig_fast_ma_diff,
                'slow_ma_diff_at_sig': sig_slow_ma_diff,
                'fast_ma_diff_at_close': None,
                'slow_ma_diff_at_close': None,
                'fast_ma_diff_at_best_sign_to_close': None,
                'slow_ma_diff_at_best_sign_to_close': None
            }

            required_margin = research.get_margin(trades, buy_label=1, sell_label=0, contract_size=contract_size, leverage=leverage, 
                                                  tradersway_commodity=tradersway_commodity, in_quote_currency=in_quote_currency, hedged_margin=hedged_margin)

            # reference on opening trades and margin level https://www.luckscout.com/leverage-margin-balance-equity-free-margin-and-margin-level-in-forex-trading/
            if required_margin > free_margin or (margin_level is not None and margin_level <= 100) \
                    or len(trades) > max_concurrent_trades or decision_prob_diff > decision_prob_diff_thresh:
                del trades[pending_order_i]
            else:
                margin = required_margin

            pending_order = None

        # update equity and free margin based on currently opened trades
        for trade_i in trades:
            trade = trades[trade_i]
            close_price = test_data_np[i][feature_indices['Close']]
            trade_decision = xgb_labels_dict[trade['decision_label']]

            profit = research.get_profit(close_price, trade['open_price'], pip_value=pip_value, 
                                         pip_resolution=pip_resolution, in_quote_currency=in_quote_currency)
            if trade_decision == 'sell':
                profit *= - 1

            if trade['profit'] is None:
                profit_delta = profit
            else:
                profit_delta = profit - trade['profit']
            trade['profit'] = profit

            if trade['best_profit'] is None or profit > trade['best_profit']:
                trade['best_profit'] = profit
                if (fast_ma_diff < 0 and trade_decision == 'buy') or (fast_ma_diff > 0 and trade_decision == 'sell'):
                    trade['fast_ma_diff_at_best_sign_to_close'] = fast_ma_diff
                if (slow_ma_diff < 0 and trade_decision == 'buy') or (slow_ma_diff > 0 and trade_decision == 'sell'):
                    trade['slow_ma_diff_at_best_sign_to_close'] = slow_ma_diff

            equity += profit_delta
            free_margin = equity - margin 
            margin_level = equity / margin * 100

            scaled_profit_noise = profit_noise if not in_quote_currency else profit_noise / close_price
            if abs(profit) >= scaled_profit_noise:
                trade['look_to_close'] = True

        # check if equity is <= 0, and if so end the sim
        if equity <= 0:
            stop = True
            print(f'strat failed (i={i}, dt={test_data_np[i][feature_indices["datetime"]]}): no more equity')

        # check if trades should be closed due to stop-out starting with biggest loss if so
        if margin_level is not None and margin_level <= stop_out_pct:
            sorted_keys = sorted(trades, key=lambda trade_i: trades[trade_i]['profit'])
            for j, trade_i in enumerate(sorted_keys):
                balance += trades[trade_i]['profit']

                open_tick_i = trades[trade_i]['trade_open_tick_i']
                trades[trade_i]['ticks_till_close'] = i - open_tick_i
                trades[trade_i]['close_idx'] = i
                trades[trade_i]['forced_close'] = True
                trades[trade_i]['fast_ma_diff_at_close'] = fast_ma_diff
                trades[trade_i]['slow_ma_diff_at_close'] = slow_ma_diff
                backtest_trades[trade_i] = trades[trade_i]

                del trades[trade_i]

                if j != len(sorted_keys) - 1:
                    margin = research.get_margin(trades, buy_label=1, sell_label=0, contract_size=contract_size, leverage=leverage, 
                                                 tradersway_commodity=tradersway_commodity, in_quote_currency=in_quote_currency, hedged_margin=hedged_margin)
                    free_margin = equity - margin
                    margin_level = equity / margin * 100                    
                    if margin_level > stop_out_pct:
                        break   

        # find trades to close based on CNN-LSTM preds
        closed_trades = []
        losing_trades = 0
        for trade_i in trades: 
            trade = trades[trade_i]
            trade_decision = xgb_labels_dict[trade['decision_label']]
            
            if trade['look_to_close']:
                if abs(fast_ma_diff) >= fast_ma_diff_thresh:
                    # (MA pct_change is decreasing on a long trade) or (MA pct_change is increasing on a short trade)
                    if (fast_ma_diff < 0 and trade_decision == 'buy') or (fast_ma_diff > 0 and trade_decision == 'sell'):  
                        closed_trades.append(trade_i)
                        continue    # continue to not count this trade in losing trades if it will be closed anyways
            
            if trade['profit'] < 0:
                losing_trades += 1

        for trade_i in closed_trades:
            balance += trades[trade_i]['profit']

            open_tick_i = trades[trade_i]['trade_open_tick_i']
            trades[trade_i]['ticks_till_close'] = i - open_tick_i
            trades[trade_i]['close_idx'] = i
            trades[trade_i]['fast_ma_diff_at_close'] = fast_ma_diff
            trades[trade_i]['slow_ma_diff_at_close'] = slow_ma_diff
            backtest_trades[trade_i] = trades[trade_i]

            del trades[trade_i]

        if len(trades) == 0:
            margin = None
            margin_level = None

        # generate decision w/ XGB classifier and create pending order
        if len(causes) > 0 and not stop:
            start = time.time()
            model_input = pd.DataFrame([xgb_model_perc_chngs[-1]], columns=model_data.columns)
            model_input = xgb.DMatrix(model_input)
            decision_prob = xgb_decision_predictor.predict(model_input)[0]
            duration = time.time() - start # inlucde converting input in pred time
            xgb_pred_times.append(duration)

            decision_label = np.around(decision_prob)

#             if (decision_label == 1 and fast_ma_diff > 0) or (decision_label == 0 and fast_ma_diff < 0):
#                 pending_order = (i, decision_label, decision_prob, causes, fast_ma_diff, slow_ma_diff)
            pending_order = (i, decision_label, decision_prob, causes, fast_ma_diff, slow_ma_diff)

        cur_pct_done = int((i-buffers_rdy_idx+1) / (len(test_data_np)-buffers_rdy_idx) * 100)
        if cur_pct_done != pct_done and cur_pct_done % 10 == 0:
            pct_done = cur_pct_done
            print(f'backtest percentage done: {cur_pct_done}%')

    free_margins.append(free_margin)
    equities.append(equity)
    balances.append(balance)
    margins.append(margin)
    margin_levels.append(margin_level)
    open_trades_counts.append(len(trades))
    losing_trades_counts.append(losing_trades)

    final_dt = test_data_np[i][feature_indices["datetime"]]
    if stop:
        break

# print backtest results

backtest_runtime = time.time() - start_time
start_dt = test_data_np[buffers_rdy_idx][feature_indices['datetime']]
end_dt = final_dt

margin_levels_no_none = [ml for ml in margin_levels if ml is not None]
max_margin_level = None if len(margin_levels_no_none) == 0 else max(margin_levels_no_none)
min_margin_level = None if len(margin_levels_no_none) == 0 else min(margin_levels_no_none)
margins_no_none = [m for m in margins if m is not None]
max_margin = None if len(margins_no_none) == 0 else max(margins_no_none)
min_margin =  None if len(margins_no_none) == 0 else min(margins_no_none)

num_won = 0
num_lost = 0
num_won_sells = 0
num_won_buys = 0
num_lost_sells = 0
num_lost_buys = 0
ma_diff_stat_names = ['fast_ma_diff_at_sig', 'slow_ma_diff_at_sig', 'fast_ma_diff_at_close', 'slow_ma_diff_at_close',
                      'fast_ma_diff_at_best_sign_to_close', 'slow_ma_diff_at_best_sign_to_close']
losses_ma_diff_stats = {name: {'list': [], 'agree_list':[], 'oppose_list':[]} for name in ma_diff_stat_names}
wins_ma_diff_stats = {name: {'list': [], 'agree_list':[], 'oppose_list':[]} for name in ma_diff_stat_names}
for trade_i in backtest_trades:
    trade = backtest_trades[trade_i]
    if trade['profit'] > 0:
        if trade['decision_label'] == 1:
            num_won_buys += 1
        else:
            num_won_sells += 1
        num_won += 1

        if (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] < 0):
            wins_ma_diff_stats['fast_ma_diff_at_sig']['agree_list'].append(abs(trade['fast_ma_diff_at_sig']))
        elif (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] > 0):
            wins_ma_diff_stats['fast_ma_diff_at_sig']['oppose_list'].append(abs(trade['fast_ma_diff_at_sig']))
        if (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] < 0):
            wins_ma_diff_stats['slow_ma_diff_at_sig']['agree_list'].append(abs(trade['slow_ma_diff_at_sig']))
        elif (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] > 0):
            wins_ma_diff_stats['slow_ma_diff_at_sig']['oppose_list'].append(abs(trade['slow_ma_diff_at_sig']))

        wins_ma_diff_stats['fast_ma_diff_at_close']['list'].append(abs(trade['fast_ma_diff_at_close']))
        wins_ma_diff_stats['slow_ma_diff_at_close']['list'].append(abs(trade['slow_ma_diff_at_close']))

        if trade['fast_ma_diff_at_best_sign_to_close'] is not None:
            wins_ma_diff_stats['fast_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['fast_ma_diff_at_best_sign_to_close']))
        if trade['slow_ma_diff_at_best_sign_to_close'] is not None:
            wins_ma_diff_stats['slow_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['slow_ma_diff_at_best_sign_to_close']))
    else:
        if trade['decision_label'] == 1:
            num_lost_buys += 1
        else:
            num_lost_sells += 1
        num_lost += 1

        if (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] < 0):
            losses_ma_diff_stats['fast_ma_diff_at_sig']['agree_list'].append(abs(trade['fast_ma_diff_at_sig']))
        elif (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] > 0):
            losses_ma_diff_stats['fast_ma_diff_at_sig']['oppose_list'].append(abs(trade['fast_ma_diff_at_sig']))
        if (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] < 0):
            losses_ma_diff_stats['slow_ma_diff_at_sig']['agree_list'].append(abs(trade['slow_ma_diff_at_sig']))
        elif (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] > 0):
            losses_ma_diff_stats['slow_ma_diff_at_sig']['oppose_list'].append(abs(trade['slow_ma_diff_at_sig']))

        losses_ma_diff_stats['fast_ma_diff_at_close']['list'].append(abs(trade['fast_ma_diff_at_close']))
        losses_ma_diff_stats['slow_ma_diff_at_close']['list'].append(abs(trade['slow_ma_diff_at_close']))

        if trade['fast_ma_diff_at_best_sign_to_close'] is not None:
            losses_ma_diff_stats['fast_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['fast_ma_diff_at_best_sign_to_close']))
        if trade['slow_ma_diff_at_best_sign_to_close'] is not None:
            losses_ma_diff_stats['slow_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['slow_ma_diff_at_best_sign_to_close']))
losses_ma_diff_stats = {name: {'arr': np.array(losses_ma_diff_stats[name]['list']), 
                               'agree_arr': np.array(losses_ma_diff_stats[name]['agree_list']), 
                               'oppose_arr': np.array(losses_ma_diff_stats[name]['oppose_list'])} for name in losses_ma_diff_stats}
wins_ma_diff_stats = {name: {'arr': np.array(wins_ma_diff_stats[name]['list']), 
                             'agree_arr': np.array(wins_ma_diff_stats[name]['agree_list']), 
                             'oppose_arr': np.array(wins_ma_diff_stats[name]['oppose_list'])} for name in wins_ma_diff_stats}

print('\n--------------------------------------------------------------------\n')
print('BACKTEST RESULTS:')
print(f'ticks data duration: {(end_dt-start_dt).days} days')
print(f'starting balance: {starting_balance}')
print(f'ending balance: {balance}')
print(f'number of trades won: {num_won}')
print(f'number of trades lost: {num_lost}')
print(f'number of buys: {num_won_buys+num_lost_buys} ({num_won_buys} won, {num_lost_buys} lost)')
print(f'number of sells: {num_won_sells+num_lost_sells} ({num_won_sells} won, {num_lost_sells} lost)')
print(f'balance range: [{min(balances)}, {max(balances)}]')
print(f'equity range: [{min(equities)}, {max(equities)}]')
print(f'free margin range: [{min(free_margins)}, {max(free_margins)}]')
print(f'margins range: [{min_margin}, {max_margin}]')
print(f'margin levels range: [{min_margin_level}, {max_margin_level}]')
print(f'concurrently open trades range: [{min(open_trades_counts)}, {max(open_trades_counts)}]')
print(f'concurrently losing trades range: [{min(losing_trades_counts)}, {max(losing_trades_counts)}]')
print(f'backtest runtime: {backtest_runtime/60} min')

print('\nWON TRADES RESULTS:')
for stat in wins_ma_diff_stats:
    stat_arr = wins_ma_diff_stats[stat]['arr']
    stat_agree_arr = wins_ma_diff_stats[stat]['agree_arr']
    stat_oppose_arr = wins_ma_diff_stats[stat]['oppose_arr']
    if len(stat_arr) > 0:
        print(f'{stat}: count={len(stat_arr)}, min={np.amin(stat_arr)}, max={np.amax(stat_arr)},'
              f' mean={np.mean(stat_arr)}, median={np.median(stat_arr)}')
    if len(stat_agree_arr) > 0:
        print(f'{stat} that aggreed: count={len(stat_agree_arr)}, min={np.amin(stat_agree_arr)}, max={np.amax(stat_agree_arr)},'
              f' mean={np.mean(stat_agree_arr)}, median={np.median(stat_agree_arr)}')
    if len(stat_oppose_arr) > 0:
        print(f'{stat} that opposed: count={len(stat_oppose_arr)}, min={np.amin(stat_oppose_arr)}, max={np.amax(stat_oppose_arr)},'
              f' mean={np.mean(stat_oppose_arr)}, median={np.median(stat_oppose_arr)}')

print('\nLOST TRADES RESULTS:')
for stat in losses_ma_diff_stats:
    stat_arr = losses_ma_diff_stats[stat]['arr']
    stat_agree_arr = losses_ma_diff_stats[stat]['agree_arr']
    stat_oppose_arr = losses_ma_diff_stats[stat]['oppose_arr']
    if len(stat_arr) > 0:
        print(f'{stat}: count={len(stat_arr)}, min={np.amin(stat_arr)}, max={np.amax(stat_arr)},'
              f' mean={np.mean(stat_arr)}, median={np.median(stat_arr)}')
    if len(stat_agree_arr) > 0:
        print(f'{stat} that aggreed: count={len(stat_agree_arr)}, min={np.amin(stat_agree_arr)}, max={np.amax(stat_agree_arr)},'
              f' mean={np.mean(stat_agree_arr)}, median={np.median(stat_agree_arr)}')
    if len(stat_oppose_arr) > 0:
        print(f'{stat} that opposed: count={len(stat_oppose_arr)}, min={np.amin(stat_oppose_arr)}, max={np.amax(stat_oppose_arr)},'
              f' mean={np.mean(stat_oppose_arr)}, median={np.median(stat_oppose_arr)}')

print('\nMODELS STATS:')
#         print(f'average pred time of fast & slow MA CNN+LSTM models: {sum(cnn_lstm_pred_times)/len(cnn_lstm_pred_times)*1000} ms')
print(f'average pred time of fast MA CNN+LSTM models: {sum(cnn_lstm_pred_times)/len(cnn_lstm_pred_times)*1000} ms')
print(f'average pred time of XGB model: {sum(xgb_pred_times)/len(xgb_pred_times)*1000} ms')

# plot strategy over time vs. price data

backtest_labels_col_names = ['decision_pred','ticks_till_best_profit_decision_pred', 'best_profit_decision_pred', 'profit_peak_decision_pred']
backtest_labels = []
for i in range(len(test_data_np)):
    if i in backtest_trades:
        trade = backtest_trades[i]
        trade_decision = xgb_labels_dict[trade['decision_label']]
        backtest_labels.append([trade_decision, trade['ticks_till_close'], trade['profit'], trade['close_idx']])
    else:
        backtest_labels.append([None]*len(backtest_labels_col_names))
backtest_labels = pd.DataFrame(backtest_labels, columns=backtest_labels_col_names)
backtest_labels = pd.concat((test_data_labels, backtest_labels.reset_index(drop=True)), axis=1)    

fill = [None] * buffers_rdy_idx
fill.extend(fast_ma_preds)
fast_ma_preds = fill 
fast_ma_preds.extend([None]*(len(test_data_np) - len(fast_ma_preds)))

fill = [None] * buffers_rdy_idx
fill.extend(slow_ma_preds)
slow_ma_preds = fill 
slow_ma_preds.extend([None]*(len(test_data_np) - len(slow_ma_preds)))

lstm_preds = pd.DataFrame({
    'fast_ma': fast_ma_preds, 
#     'slow_ma': slow_ma_preds
})

balances.extend([None]*(len(test_data_np) - len(balances)))
equities.extend([None]*(len(test_data_np) - len(equities)))
free_margins.extend([None]*(len(test_data_np) - len(free_margins)))
open_trades_counts.extend([None]*(len(test_data_np) - len(open_trades_counts)))
losing_trades_counts.extend([None]*(len(test_data_np) - len(losing_trades_counts)))

strat_data_df = pd.DataFrame({
    'balance': balances,
    'equity': equities,
    'free margin': free_margins
})

open_trades_counts_df = pd.DataFrame({
    'open trades': open_trades_counts,
    'losing trades': losing_trades_counts
})

labels = [#'first_decision','ticks_till_best_profit_first_decision', 'best_profit_first_decision', 'profit_peak_first_decision',
          'decision_pred','ticks_till_best_profit_decision_pred', 'best_profit_decision_pred', 'profit_peak_decision_pred']
show_data_from_range(test_data_with_ichi_sigs, start_dt.isoformat(), end_dt.isoformat(), main_indicator='ichimoku', 
                     sub_indicators=[lstm_preds, strat_data_df, open_trades_counts_df], visualize_crosses=True, visualize_labels=True, 
                     labels_df=backtest_labels, labels=labels)

model buffers full, beginning trade sim...
backtest percentage done: 10%
backtest percentage done: 20%
backtest percentage done: 30%
backtest percentage done: 40%
backtest percentage done: 50%
backtest percentage done: 60%
backtest percentage done: 70%
backtest percentage done: 80%
backtest percentage done: 90%
backtest percentage done: 100%

--------------------------------------------------------------------

BACKTEST RESULTS:
ticks data duration: 80 days
starting balance: 1000
ending balance: 4767.800000000004
number of trades won: 154
number of trades lost: 118
number of buys: 233 (134 won, 99 lost)
number of sells: 39 (20 won, 19 lost)
balance range: [731.6000000000001, 4767.800000000004]
equity range: [852.6000000000022, 4766.6000000000195]
free margin range: [735.4819333333355, 4692.996066666686]
margins range: [23.4434, 414.4994088888889]
margin levels range: [403.5431048341795, 18983.078524627046]
concurrently open trades range: [0, 9]
concurrently losing trades range: [0, 6]


In [57]:
# # for debugging

# fast_ma_perc_chngs = pd.DataFrame(fast_ma_perc_chngs,columns=model_data.columns)
# print(fast_ma_perc_chngs.shape)

# slow_ma_perc_chngs = pd.DataFrame(slow_ma_perc_chngs,columns=model_data.columns)
# print(slow_ma_perc_chngs.shape)  

# x = apply_moving_avg(model_data, ma_cols, fast_ma_window)
# x.dropna(how='any', axis=0, inplace=True)
# x = apply_perc_change(x, pc_cols)
# x.dropna(how='any', axis=0, inplace=True)
# x = normalize_data(x, train_data=False, normalization_terms=fast_ma_norm_terms)[0]
# x_vals = x.to_numpy().astype(np.float32)

# print(x.shape)
# res = np.isclose(x_vals, fast_ma_perc_chngs.to_numpy().astype(np.float32))
# print(res)
# print(np.all(res))

# print()

# x = apply_moving_avg(model_data, ma_cols, slow_ma_window)
# x.dropna(how='any', axis=0, inplace=True)
# x = apply_perc_change(x, pc_cols)
# x.dropna(how='any', axis=0, inplace=True)
# x = normalize_data(x, train_data=False, normalization_terms=slow_ma_norm_terms)[0]
# x_vals = x.to_numpy().astype(np.float32)

# print(x.shape)
# res = np.isclose(x_vals, slow_ma_perc_chngs.to_numpy().astype(np.float32))
# print(res)
# print(np.all(res))

#### tune strat hyperparams with grid search

In [73]:
fast_ma_diff_thresholds = list(np.linspace(0,0.01,num=15))
fast_ma_diff_thresholds.append(0.011)
decision_prob_diff_thresholds = list(np.arange(0.35,0.5,step=0.05))
profit_noise_percents = list(np.linspace(0,0.002,num=15))

param_grid = {
    'ma_models_settings': [
        {
            'fast_ma_model_path': '../my_stuff/final_EURUSD-H1_Bi-LSTM_7-ma_9-30-60-ichi.hdf5',
#             'slow_ma_model_path': '../my_stuff/final_EURUSD-H1_Bi-LSTM_13-ma_8-22-44-ichi.hdf5',
            'fast_ma_window': 7,
#             'slow_ma_window': 13
        },
    ],
    'xgb_model_settings': [
        {
            'model_filepath': '../my_stuff/EURUSD-H1_0.01-min_profit_0.2-lots_right-cur_side_9-30-60-cb-tk-tkp-sen-chi-ichi_xgb_classifier.json',
            'lots_per_trade': 0.2,
            'ichi_settings': (9, 30, 60),
            'currency_side': 'right'
        },
    ],
    'strat_params': [{
        'starting_balance': [1000],
        'leverage': [500],
        'max_concurrent_trades': [np.inf, 10, 5],
        'fast_ma_diff_threshold': fast_ma_diff_thresholds, 
        'decision_prob_diff_thresh': decision_prob_diff_thresholds,
        'profit_noise_percent': profit_noise_percents
    }]
}

param_grid = {
    'ma_models_settings': [
        {
            'fast_ma_model_path': '../my_stuff/final_EURUSD-H1_Bi-LSTM_7-ma_9-30-60-ichi.hdf5',
#             'slow_ma_model_path': '../my_stuff/final_EURUSD-H1_Bi-LSTM_13-ma_8-22-44-ichi.hdf5',
            'fast_ma_window': 7,
#             'slow_ma_window': 13
        },
    ],
    'xgb_model_settings': [
        {
            'model_filepath': '../my_stuff/EURUSD-H1_0.01-min_profit_0.2-lots_right-cur_side_9-30-60-cb-tk-tkp-sen-chi-ichi_xgb_classifier.json',
            'lots_per_trade': 0.2,
            'ichi_settings': (9, 30, 60),
            'currency_side': 'right'
        },
    ],
    'strat_params': [{
        'starting_balance': [1000],
        'leverage': [500],
        'max_concurrent_trades': [np.inf],
        'fast_ma_diff_threshold': [0.01], 
        'decision_prob_diff_thresh': [0.5],
        'profit_noise_percent': [0.0016]
    }]
}

lstm_seq_len = 128
xgb_labels_dict = {1: 'buy', 0: 'sell'}
contract_size = 100_000   # size of 1 lot is typically 100,000 (100 for gold, becuase 1 lot = 100 oz of gold)
pip_resolution = 0.0001
stop_out_pct = 0.2  # explaination: https://www.tradersway.com/new_to_the_market/forex_and_cfd_basics#margin
label_non_signals=False
cur_pair = 'EURUSD'
timeframe = 'H1'
hedged_margin = 50_000
tradersway_commodity = False

open_trade_sigs = ['cloud_breakout_bull','cloud_breakout_bear',                       # cloud breakout
                   'tk_cross_bull_strength', 'tk_cross_bear_strength',                # Tenkan Sen / Kijun Sen Cross
                   'tk_price_cross_bull_strength', 'tk_price_cross_bear_strength',    # price crossing both the Tenkan Sen / Kijun Sen
                   'senkou_cross_bull_strength', 'senkou_cross_bear_strength',        # Senkou Span Cross
                   'chikou_cross_bull_strength', 'chikou_cross_bear_strength']        # Chikou Span Cross
ma_cols = ['Open','High','Low','Close','Volume']
pc_cols = ['Open','High','Low','Close','Volume',
           'trend_ichimoku_base','trend_ichimoku_conv',
           'trend_ichimoku_a', 'trend_ichimoku_b']
normalization_groups = [['Open','High','Low','Close'],  # prices
                        ['trend_ichimoku_base','trend_ichimoku_conv'],  # ichi conv & base lines
                        ['trend_ichimoku_a', 'trend_ichimoku_b'], # ichi cloud lines
                        ['tk_cross_bull_strength','tk_cross_bear_strength',   # tk cross strength
                        'tk_price_cross_bull_strength','tk_price_cross_bear_strength',   # tk price cross strength
                        'senkou_cross_bull_strength','senkou_cross_bear_strength',   # semkou cross strength
                        'chikou_cross_bull_strength','chikou_cross_bear_strength']]   # chikou cross strength


param_grid = ParameterGrid(param_grid)
param_grid = random.sample(list(param_grid), len(param_grid))

strat_params_len = len(ParameterGrid(param_grid[0]['strat_params']))
total_params = len(param_grid) * strat_params_len
print(f'total number of parameters to test in grid search: {total_params}')

total number of parameters to test in grid search: 1


In [74]:
# start_dt_str, end_dt_str = '2020-10-02', '2021-01-05'
start_dt_str, end_dt_str = '2020-11-02', '2021-01-05'
grid_search_save_path = f'../my_stuff/{cur_pair}-{timeframe}_{start_dt_str}-to-{end_dt_str}_backtest_grid_search_results.csv'
tick_data_filepath = research.download_mt5_data(cur_pair, timeframe, start_dt_str, end_dt_str) # (cur_pair, timeframe, '2020-10-02', '2021-01-05')
best_strat_results = None
best_strat_score = None
backtest_results = []

grid_search_start_time = time.time()
for params_i, params in enumerate(param_grid):
    s1 = time.time()
    ma_models_settings = params['ma_models_settings']
    xgb_model_settings = params['xgb_model_settings']
    
    strat_params = params['strat_params']
    strat_params = ParameterGrid(strat_params)
    strat_params = random.sample(list(strat_params), len(strat_params))
    
    fast_ma_model_path = ma_models_settings['fast_ma_model_path']
#     slow_ma_model_path = ma_models_settings['slow_ma_model_path']
    fast_ma_window = ma_models_settings['fast_ma_window']
#     slow_ma_window = ma_models_settings['slow_ma_window']
    
    fast_ma_model = tf.keras.models.load_model(fast_ma_model_path)
#     slow_ma_model = tf.keras.models.load_model(slow_ma_model_path)
    
    xgb_model_path = xgb_model_settings['model_filepath']
    lots_per_trade = xgb_model_settings['lots_per_trade']
    tenkan_period, kijun_period, senkou_b_period = xgb_model_settings['ichi_settings']
    currency_side = xgb_model_settings['currency_side']
    in_quote_currency = True if currency_side == 'right' else False
    
    xgb_decision_predictor = xgb.Booster()
    xgb_decision_predictor.load_model(xgb_model_path)
    
    indicators_info = {
        'ichimoku': {
            'tenkan_period': tenkan_period,
            'kijun_period': kijun_period,
            'chikou_period': kijun_period,
            'senkou_b_period': senkou_b_period
        },
        'rsi': {
            'periods': 14
        }
    }
    
    data_with_indicators = research.add_indicators_to_raw(filepath=tick_data_filepath, 
                                                          indicators_info=indicators_info, 
                                                          datetime_col='datetime')
    test_data_with_ichi_sigs = research.add_ichimoku_features(data_with_indicators)
    model_data = research.dummy_and_remove_features(test_data_with_ichi_sigs)
    
    start, stop = research.no_missing_data_idx_range(model_data)

    model_data = model_data.iloc[start:stop+1]
    model_data_np = model_data.to_numpy()

    test_data_with_ichi_sigs = test_data_with_ichi_sigs.iloc[start:stop+1]
    test_data_np = test_data_with_ichi_sigs.to_numpy()

    ma_cols_set = set([model_data.columns.get_loc(col_name) for col_name in ma_cols])
    pc_cols_set = set([model_data.columns.get_loc(col_name) for col_name in pc_cols])

    feature_indices = {test_data_with_ichi_sigs.columns[i]: i for i in range(len(test_data_with_ichi_sigs.columns))}
    
    for params_i_2, params_2 in enumerate(strat_params):
        s2 = time.time()
        starting_balance = params_2['starting_balance']
        leverage = params_2['leverage']    # 1:leverage
        max_concurrent_trades = params_2['max_concurrent_trades']
        fast_ma_diff_thresh = params_2['fast_ma_diff_threshold']
        decision_prob_diff_thresh = params_2['decision_prob_diff_thresh']   # 0.5 accepts all probabilities
        profit_noise_percent = params_2['profit_noise_percent']
        
        pip_value = contract_size * lots_per_trade * pip_resolution   # in quote currency (right side currency of currency pair)
        profit_noise = profit_noise_percent * lots_per_trade * contract_size   # in base currecy because thats what models were traied on
        
        fast_ma_data = research.get_split_lstm_data(model_data, ma_window=fast_ma_window, seq_len=lstm_seq_len, split_percents=(0,0), 
                                                    normalization_groups=normalization_groups, pc_cols=pc_cols, ma_cols=ma_cols, min_batch_size=1000, 
                                                    max_batch_size=2000, just_train=True, print_info=False)
#         slow_ma_data = research.get_split_lstm_data(model_data, ma_window=slow_ma_window, seq_len=lstm_seq_len, split_percents=(0,0), 
#                                           normalization_groups=normalization_groups, pc_cols=pc_cols, ma_cols=ma_cols, min_batch_size=1000, 
#                                           max_batch_size=2000, just_train=True, print_info=False)
        
        trades = {}
        backtest_trades = {}   # closed trades results
        pending_order = None
        pending_close = None
        decisions_so_far = []
        fast_ma_seq_buf = deque()
        slow_ma_seq_buf = deque()
        fast_ma_window_buf = deque()
        slow_ma_window_buf = deque()
        fast_ma_avgs = []
        slow_ma_avgs = []
        fast_ma_perc_chngs = []
        slow_ma_perc_chngs = []
        xgb_model_perc_chngs = []
        fast_ma_preds = []
        slow_ma_preds = []
        cnn_lstm_pred_times = []
        xgb_pred_times = []
        free_margins = []
        margins = []
        margin_levels = []
        equities = []
        balances = []
        open_trades_counts = []
        losing_trades_counts = []
        pct_done = 0
        buffers_rdy_idx = None
        balance = starting_balance
        equity = starting_balance
        free_margin = starting_balance
        losing_trades = 0
        margin_level = None
        margin = None
        final_dt = None
        stop = False

        start_time = time.time()
        for i in range(len(test_data_np)):
            """
            fill data buffers for models
            """

            # for xgb model

            if i > 0:
                row = research.apply_perc_change_list(model_data_np[i-1], model_data_np[i], cols_set=pc_cols_set)
                xgb_model_perc_chngs.append(row)

            # for fast MA model

            fast_ma_window_buf.append(model_data_np[i])
            if len(fast_ma_window_buf) > fast_ma_window:
                fast_ma_window_buf.popleft()

            if len(fast_ma_window_buf) == fast_ma_window:
                row = research.apply_moving_avg_q(fast_ma_window_buf, ma_cols_set)
                fast_ma_avgs.append(row)

            if len(fast_ma_avgs) >= 2:
                row = research.apply_perc_change_list(fast_ma_avgs[-2], fast_ma_avgs[-1], pc_cols_set)
                row = research.normalize_data_list(row, fast_ma_norm_terms)
                fast_ma_perc_chngs.append(row) 

            if len(fast_ma_perc_chngs) > 0:
                fast_ma_seq_buf.append(fast_ma_perc_chngs[-1])

            if len(fast_ma_seq_buf) > lstm_seq_len:
                fast_ma_seq_buf.popleft()

            # for slow MA model

        #     slow_ma_window_buf.append(model_data_np[i])
        #     if len(slow_ma_window_buf) > slow_ma_window:
        #         slow_ma_window_buf.popleft()

        #     if len(slow_ma_window_buf) == slow_ma_window:
        #         row = apply_moving_avg_q(slow_ma_window_buf, ma_cols_set)
        #         slow_ma_avgs.append(row)

        #     if len(slow_ma_avgs) >= 2:
        #         row = apply_perc_change_list(slow_ma_avgs[-2], slow_ma_avgs[-1], pc_cols_set)
        #         row = normalize_data_list(row, slow_ma_norm_terms)
        #         slow_ma_perc_chngs.append(row)  

        #     if len(slow_ma_perc_chngs) > 0:
        #         slow_ma_seq_buf.append(slow_ma_perc_chngs[-1])

        #     if len(slow_ma_seq_buf) > lstm_seq_len:
        #         slow_ma_seq_buf.popleft()

            # now check if LSTMs have enough data to being trade simulation

        #     if len(fast_ma_seq_buf) == lstm_seq_len and len(slow_ma_seq_buf) == lstm_seq_len:
            if len(fast_ma_seq_buf) == lstm_seq_len:
                """
                simulate trading
                """

                if buffers_rdy_idx is None:
                    buffers_rdy_idx = i
                    print('model buffers full, beginning trade sim...')

                # look for ichiomku signals
                causes = []
                for sig in open_trade_sigs:
                    sig_i = feature_indices[sig]
                    if test_data_np[i][sig_i] != 0:
                        causes.append(sig)

                start = time.time()
                fast_ma_pred = fast_ma_model.predict(np.array([fast_ma_seq_buf]))
                slow_ma_pred = [[0]] #slow_ma_model.predict(np.array([slow_ma_seq_buf]))
                duration = time.time() - start
                cnn_lstm_pred_times.append(duration)

                fast_ma_preds.append(fast_ma_pred[0][0])
                slow_ma_preds.append(slow_ma_pred[0][0])

                if len(fast_ma_preds) > 1:
                    fast_ma_diff = fast_ma_preds[-1] - fast_ma_preds[-2]    # remember this is the diff in the pct_change of the mov avg
                    slow_ma_diff = slow_ma_preds[-1] - slow_ma_preds[-2]
                else:
                    fast_ma_diff = 0
                    slow_ma_diff = 0

                if pending_order is not None:
                    pending_order_i, decision_label, decision_prob, order_causes, sig_fast_ma_diff, sig_slow_ma_diff = pending_order
                    open_price = test_data_np[i][feature_indices['Open']]
                    decision_prob_diff = abs(decision_label-decision_prob)

                    trades[pending_order_i] = {
                        'decision_label': decision_label,
                        'decision_prob': decision_prob,
                        'causes': order_causes,
                        'open_price': open_price,
                        'trade_open_tick_i': i,
                        'profit': None,
                        'best_profit': None,
                        'ticks_till_close': None,
                        'close_idx': None,
                        'lots': lots_per_trade,
                        'look_to_close': False,
                        'forced_close': False,
                        'fast_ma_diff_at_sig': sig_fast_ma_diff,
                        'slow_ma_diff_at_sig': sig_slow_ma_diff,
                        'fast_ma_diff_at_close': None,
                        'slow_ma_diff_at_close': None,
                        'fast_ma_diff_at_best_sign_to_close': None,
                        'slow_ma_diff_at_best_sign_to_close': None
                    }

                    required_margin = research.get_margin(trades, buy_label=1, sell_label=0, contract_size=contract_size, leverage=leverage, 
                                                          tradersway_commodity=tradersway_commodity, in_quote_currency=in_quote_currency, hedged_margin=hedged_margin)

                    # reference on opening trades and margin level https://www.luckscout.com/leverage-margin-balance-equity-free-margin-and-margin-level-in-forex-trading/
                    if required_margin > free_margin or (margin_level is not None and margin_level <= 100) \
                            or len(trades) > max_concurrent_trades or decision_prob_diff > decision_prob_diff_thresh:
                        del trades[pending_order_i]
                    else:
                        margin = required_margin

                    pending_order = None

                # update equity and free margin based on currently opened trades
                for trade_i in trades:
                    trade = trades[trade_i]
                    close_price = test_data_np[i][feature_indices['Close']]
                    trade_decision = xgb_labels_dict[trade['decision_label']]

                    profit = research.get_profit(close_price, trade['open_price'], pip_value=pip_value, 
                                                 pip_resolution=pip_resolution, in_quote_currency=in_quote_currency)
                    if trade_decision == 'sell':
                        profit *= - 1

                    if trade['profit'] is None:
                        profit_delta = profit
                    else:
                        profit_delta = profit - trade['profit']
                    trade['profit'] = profit

                    if trade['best_profit'] is None or profit > trade['best_profit']:
                        trade['best_profit'] = profit
                        if (fast_ma_diff < 0 and trade_decision == 'buy') or (fast_ma_diff > 0 and trade_decision == 'sell'):
                            trade['fast_ma_diff_at_best_sign_to_close'] = fast_ma_diff
                        if (slow_ma_diff < 0 and trade_decision == 'buy') or (slow_ma_diff > 0 and trade_decision == 'sell'):
                            trade['slow_ma_diff_at_best_sign_to_close'] = slow_ma_diff

                    equity += profit_delta
                    free_margin = equity - margin 
                    margin_level = equity / margin * 100

                    scaled_profit_noise = profit_noise if not in_quote_currency else profit_noise / close_price
                    if abs(profit) >= scaled_profit_noise:
                        trade['look_to_close'] = True

                # check if equity is <= 0, and if so end the sim
                if equity <= 0:
                    stop = True
                    print(f'strat failed (i={i}, dt={test_data_np[i][feature_indices["datetime"]]}): no more equity')

                # check if trades should be closed due to stop-out starting with biggest loss if so
                if margin_level is not None and margin_level <= stop_out_pct:
                    sorted_keys = sorted(trades, key=lambda trade_i: trades[trade_i]['profit'])
                    for j, trade_i in enumerate(sorted_keys):
                        balance += trades[trade_i]['profit']

                        open_tick_i = trades[trade_i]['trade_open_tick_i']
                        trades[trade_i]['ticks_till_close'] = i - open_tick_i
                        trades[trade_i]['close_idx'] = i
                        trades[trade_i]['forced_close'] = True
                        trades[trade_i]['fast_ma_diff_at_close'] = fast_ma_diff
                        trades[trade_i]['slow_ma_diff_at_close'] = slow_ma_diff
                        backtest_trades[trade_i] = trades[trade_i]

                        del trades[trade_i]

                        if j != len(sorted_keys) - 1:
                            margin = research.get_margin(trades, buy_label=1, sell_label=0, contract_size=contract_size, leverage=leverage, 
                                                         tradersway_commodity=tradersway_commodity, in_quote_currency=in_quote_currency, hedged_margin=hedged_margin)
                            free_margin = equity - margin
                            margin_level = equity / margin * 100                    
                            if margin_level > stop_out_pct:
                                break   

                # find trades to close based on CNN-LSTM preds
                closed_trades = []
                losing_trades = 0
                for trade_i in trades: 
                    trade = trades[trade_i]
                    trade_decision = xgb_labels_dict[trade['decision_label']]

                    if trade['look_to_close']:
                        if abs(fast_ma_diff) >= fast_ma_diff_thresh:
                            # (MA pct_change is decreasing on a long trade) or (MA pct_change is increasing on a short trade)
                            if (fast_ma_diff < 0 and trade_decision == 'buy') or (fast_ma_diff > 0 and trade_decision == 'sell'):  
                                closed_trades.append(trade_i)
                                continue    # continue to not count this trade in losing trades if it will be closed anyways

                    if trade['profit'] < 0:
                        losing_trades += 1

                for trade_i in closed_trades:
                    balance += trades[trade_i]['profit']

                    open_tick_i = trades[trade_i]['trade_open_tick_i']
                    trades[trade_i]['ticks_till_close'] = i - open_tick_i
                    trades[trade_i]['close_idx'] = i
                    trades[trade_i]['fast_ma_diff_at_close'] = fast_ma_diff
                    trades[trade_i]['slow_ma_diff_at_close'] = slow_ma_diff
                    backtest_trades[trade_i] = trades[trade_i]

                    del trades[trade_i]

                if len(trades) == 0:
                    margin = None
                    margin_level = None

                # generate decision w/ XGB classifier and create pending order
                if len(causes) > 0 and not stop:
                    start = time.time()
                    model_input = pd.DataFrame([xgb_model_perc_chngs[-1]], columns=model_data.columns)
                    model_input = xgb.DMatrix(model_input)
                    decision_prob = xgb_decision_predictor.predict(model_input)[0]
                    duration = time.time() - start # inlucde converting input in pred time
                    xgb_pred_times.append(duration)

                    decision_label = np.around(decision_prob)

        #             if (decision_label == 1 and fast_ma_diff > 0) or (decision_label == 0 and fast_ma_diff < 0):
        #                 pending_order = (i, decision_label, decision_prob, causes, fast_ma_diff, slow_ma_diff)
                    pending_order = (i, decision_label, decision_prob, causes, fast_ma_diff, slow_ma_diff)

                cur_pct_done = int((i-buffers_rdy_idx+1) / (len(test_data_np)-buffers_rdy_idx) * 100)
                if cur_pct_done != pct_done and cur_pct_done % 10 == 0:
                    pct_done = cur_pct_done
                    print(f'backtest percentage done: {cur_pct_done}%')

            free_margins.append(free_margin)
            equities.append(equity)
            balances.append(balance)
            margins.append(margin)
            margin_levels.append(margin_level)
            open_trades_counts.append(len(trades))
            losing_trades_counts.append(losing_trades)

            final_dt = test_data_np[i][feature_indices["datetime"]]
            if stop:
                break

        # print backtest results

        backtest_runtime = time.time() - start_time
        start_dt = test_data_np[buffers_rdy_idx][feature_indices['datetime']]
        end_dt = final_dt

        margin_levels_no_none = [ml for ml in margin_levels if ml is not None]
        max_margin_level = None if len(margin_levels_no_none) == 0 else max(margin_levels_no_none)
        min_margin_level = None if len(margin_levels_no_none) == 0 else min(margin_levels_no_none)
        margins_no_none = [m for m in margins if m is not None]
        max_margin = None if len(margins_no_none) == 0 else max(margins_no_none)
        min_margin =  None if len(margins_no_none) == 0 else min(margins_no_none)

        num_won = 0
        num_lost = 0
        num_won_sells = 0
        num_won_buys = 0
        num_lost_sells = 0
        num_lost_buys = 0
        ma_diff_stat_names = ['fast_ma_diff_at_sig', 'slow_ma_diff_at_sig', 'fast_ma_diff_at_close', 'slow_ma_diff_at_close',
                              'fast_ma_diff_at_best_sign_to_close', 'slow_ma_diff_at_best_sign_to_close']
        losses_ma_diff_stats = {name: {'list': [], 'agree_list':[], 'oppose_list':[]} for name in ma_diff_stat_names}
        wins_ma_diff_stats = {name: {'list': [], 'agree_list':[], 'oppose_list':[]} for name in ma_diff_stat_names}
        for trade_i in backtest_trades:
            trade = backtest_trades[trade_i]
            if trade['profit'] > 0:
                if trade['decision_label'] == 1:
                    num_won_buys += 1
                else:
                    num_won_sells += 1
                num_won += 1

                if (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] < 0):
                    wins_ma_diff_stats['fast_ma_diff_at_sig']['agree_list'].append(abs(trade['fast_ma_diff_at_sig']))
                elif (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] > 0):
                    wins_ma_diff_stats['fast_ma_diff_at_sig']['oppose_list'].append(abs(trade['fast_ma_diff_at_sig']))
                if (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] < 0):
                    wins_ma_diff_stats['slow_ma_diff_at_sig']['agree_list'].append(abs(trade['slow_ma_diff_at_sig']))
                elif (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] > 0):
                    wins_ma_diff_stats['slow_ma_diff_at_sig']['oppose_list'].append(abs(trade['slow_ma_diff_at_sig']))

                wins_ma_diff_stats['fast_ma_diff_at_close']['list'].append(abs(trade['fast_ma_diff_at_close']))
                wins_ma_diff_stats['slow_ma_diff_at_close']['list'].append(abs(trade['slow_ma_diff_at_close']))

                if trade['fast_ma_diff_at_best_sign_to_close'] is not None:
                    wins_ma_diff_stats['fast_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['fast_ma_diff_at_best_sign_to_close']))
                if trade['slow_ma_diff_at_best_sign_to_close'] is not None:
                    wins_ma_diff_stats['slow_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['slow_ma_diff_at_best_sign_to_close']))
            else:
                if trade['decision_label'] == 1:
                    num_lost_buys += 1
                else:
                    num_lost_sells += 1
                num_lost += 1

                if (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] < 0):
                    losses_ma_diff_stats['fast_ma_diff_at_sig']['agree_list'].append(abs(trade['fast_ma_diff_at_sig']))
                elif (trade['decision_label'] == 1 and trade['fast_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['fast_ma_diff_at_sig'] > 0):
                    losses_ma_diff_stats['fast_ma_diff_at_sig']['oppose_list'].append(abs(trade['fast_ma_diff_at_sig']))
                if (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] > 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] < 0):
                    losses_ma_diff_stats['slow_ma_diff_at_sig']['agree_list'].append(abs(trade['slow_ma_diff_at_sig']))
                elif (trade['decision_label'] == 1 and trade['slow_ma_diff_at_sig'] < 0) or (trade['decision_label'] == 0 and trade['slow_ma_diff_at_sig'] > 0):
                    losses_ma_diff_stats['slow_ma_diff_at_sig']['oppose_list'].append(abs(trade['slow_ma_diff_at_sig']))

                losses_ma_diff_stats['fast_ma_diff_at_close']['list'].append(abs(trade['fast_ma_diff_at_close']))
                losses_ma_diff_stats['slow_ma_diff_at_close']['list'].append(abs(trade['slow_ma_diff_at_close']))

                if trade['fast_ma_diff_at_best_sign_to_close'] is not None:
                    losses_ma_diff_stats['fast_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['fast_ma_diff_at_best_sign_to_close']))
                if trade['slow_ma_diff_at_best_sign_to_close'] is not None:
                    losses_ma_diff_stats['slow_ma_diff_at_best_sign_to_close']['list'].append(abs(trade['slow_ma_diff_at_best_sign_to_close']))
        losses_ma_diff_stats = {name: {'arr': np.array(losses_ma_diff_stats[name]['list']), 
                                       'agree_arr': np.array(losses_ma_diff_stats[name]['agree_list']), 
                                       'oppose_arr': np.array(losses_ma_diff_stats[name]['oppose_list'])} for name in losses_ma_diff_stats}
        wins_ma_diff_stats = {name: {'arr': np.array(wins_ma_diff_stats[name]['list']), 
                                     'agree_arr': np.array(wins_ma_diff_stats[name]['agree_list']), 
                                     'oppose_arr': np.array(wins_ma_diff_stats[name]['oppose_list'])} for name in wins_ma_diff_stats}

        print('\n--------------------------------------------------------------------\n')
        print('BACKTEST RESULTS:')
        print(f'ticks data duration: {(end_dt-start_dt).days} days')
        print(f'starting balance: {starting_balance}')
        print(f'ending balance: {balance}')
        print(f'number of trades won: {num_won}')
        print(f'number of trades lost: {num_lost}')
        print(f'number of buys: {num_won_buys+num_lost_buys} ({num_won_buys} won, {num_lost_buys} lost)')
        print(f'number of sells: {num_won_sells+num_lost_sells} ({num_won_sells} won, {num_lost_sells} lost)')
        print(f'balance range: [{min(balances)}, {max(balances)}]')
        print(f'equity range: [{min(equities)}, {max(equities)}]')
        print(f'free margin range: [{min(free_margins)}, {max(free_margins)}]')
        print(f'margins range: [{min_margin}, {max_margin}]')
        print(f'margin levels range: [{min_margin_level}, {max_margin_level}]')
        print(f'concurrently open trades range: [{min(open_trades_counts)}, {max(open_trades_counts)}]')
        print(f'concurrently losing trades range: [{min(losing_trades_counts)}, {max(losing_trades_counts)}]')
        print(f'backtest runtime: {backtest_runtime/60} min')

        print('\nWON TRADES RESULTS:')
        for stat in wins_ma_diff_stats:
            stat_arr = wins_ma_diff_stats[stat]['arr']
            stat_agree_arr = wins_ma_diff_stats[stat]['agree_arr']
            stat_oppose_arr = wins_ma_diff_stats[stat]['oppose_arr']
            if len(stat_arr) > 0:
                print(f'{stat}: count={len(stat_arr)}, min={np.amin(stat_arr)}, max={np.amax(stat_arr)},'
                      f' mean={np.mean(stat_arr)}, median={np.median(stat_arr)}')
            if len(stat_agree_arr) > 0:
                print(f'{stat} that aggreed: count={len(stat_agree_arr)}, min={np.amin(stat_agree_arr)}, max={np.amax(stat_agree_arr)},'
                      f' mean={np.mean(stat_agree_arr)}, median={np.median(stat_agree_arr)}')
            if len(stat_oppose_arr) > 0:
                print(f'{stat} that opposed: count={len(stat_oppose_arr)}, min={np.amin(stat_oppose_arr)}, max={np.amax(stat_oppose_arr)},'
                      f' mean={np.mean(stat_oppose_arr)}, median={np.median(stat_oppose_arr)}')

        print('\nLOST TRADES RESULTS:')
        for stat in losses_ma_diff_stats:
            stat_arr = losses_ma_diff_stats[stat]['arr']
            stat_agree_arr = losses_ma_diff_stats[stat]['agree_arr']
            stat_oppose_arr = losses_ma_diff_stats[stat]['oppose_arr']
            if len(stat_arr) > 0:
                print(f'{stat}: count={len(stat_arr)}, min={np.amin(stat_arr)}, max={np.amax(stat_arr)},'
                      f' mean={np.mean(stat_arr)}, median={np.median(stat_arr)}')
            if len(stat_agree_arr) > 0:
                print(f'{stat} that aggreed: count={len(stat_agree_arr)}, min={np.amin(stat_agree_arr)}, max={np.amax(stat_agree_arr)},'
                      f' mean={np.mean(stat_agree_arr)}, median={np.median(stat_agree_arr)}')
            if len(stat_oppose_arr) > 0:
                print(f'{stat} that opposed: count={len(stat_oppose_arr)}, min={np.amin(stat_oppose_arr)}, max={np.amax(stat_oppose_arr)},'
                      f' mean={np.mean(stat_oppose_arr)}, median={np.median(stat_oppose_arr)}')

        print('\nMODELS STATS:')
        #         print(f'average pred time of fast & slow MA CNN+LSTM models: {sum(cnn_lstm_pred_times)/len(cnn_lstm_pred_times)*1000} ms')
        print(f'average pred time of fast MA CNN+LSTM models: {sum(cnn_lstm_pred_times)/len(cnn_lstm_pred_times)*1000} ms')
        print(f'average pred time of XGB model: {sum(xgb_pred_times)/len(xgb_pred_times)*1000} ms')
        
        results = {
            'tenkan_period': tenkan_period,
            'kijun_period': kijun_period,
            'chikou_period': kijun_period,
            'senkou_b_period': senkou_b_period,
            'fast_ma_model_path': fast_ma_model_path,
#             'slow_ma_model_path': slow_ma_model_path,
            'fast_ma_window': fast_ma_window,
#             'slow_ma_window': slow_ma_window,
            'xgb_model_path': xgb_model_path,
            'lots_per_trade': lots_per_trade,
            'profit_noise_percent': profit_noise_percent,
            'stop_out_pct': stop_out_pct,
            'starting_balance': starting_balance,
            'leverage': leverage,
            'max_concurrent_trades': max_concurrent_trades,
            'currency_side': currency_side,
            'fast_ma_diff_thresh': fast_ma_diff_thresh,
#             'slow_ma_diff_thresh': slow_ma_diff_thresh,
            'decision_prob_diff_thresh': decision_prob_diff_thresh,
            'ending_balance': balance,
            'max_balance': max(balances),
            'min_balance': min(balances),
            'max_equity': max(equities),
            'min_equity': min(equities),
            'max_free_margin': max(free_margins),
            'min_free_margin': min(free_margins),
            'max_margin': max_margin,
            'min_margin': min_margin,
            'max_margin_level': max_margin_level,
            'min_margin_level': min_margin_level,
            'max_concurrently_open_trades': max(open_trades_counts),
            'min_concurrently_open_trades': min(open_trades_counts),
            'num_won_trades': num_won,
            'num_lost_trades': num_lost,
            'num_buys': num_won_buys+num_lost_buys,
            'num_won_buys': num_won_buys,
            'num_lost_buys': num_lost_buys,
            'num_sells': num_won_sells+num_lost_sells,
            'num_won_sells': num_won_sells,
            'num_lost_sells': num_lost_sells
        }
        
        strat_score = balance
        if best_strat_results is None or best_strat_score < strat_score:
            best_strat_results = results
            best_strat_score = strat_score
            
        backtest_results.append(results)
        
        print('\n--------------------------------------------------------------------------------')
        print(f'{params_i_2+1}/{len(strat_params)} strat params tested, runtime of last params: {(time.time()-s2)/60} min')
        print('--------------------------------------------------------------------------------\n')
        print(f'last backtest results:')
        print(f'{results}\n')
        print(f'best backtest results:')
        print(f'{best_strat_results}\n')
        
        if params_i_2 % 100 == 0:
            backtest_results_sorted = sorted(backtest_results, key=lambda d: d['ending_balance'], reverse=True)
            backtest_results_sorted_df = pd.DataFrame(backtest_results_sorted)
            backtest_results_sorted_df.to_csv(grid_search_save_path)
        
    print('\n--------------------------------------------------------------------------------')
    print(f'{params_i+1}/{len(param_grid)} model combos tested, runtime of last combo: {(time.time()-s1)/60} min')
    print('--------------------------------------------------------------------------------\n')

    backtest_results_sorted = sorted(backtest_results, key=lambda d: d['ending_balance'], reverse=True)
    backtest_results_sorted_df = pd.DataFrame(backtest_results_sorted)
    backtest_results_sorted_df.to_csv(grid_search_save_path)

print(f'grid search runtime: {(time.time()-grid_search_start_time)/60} min')

backtest_results_sorted = sorted(backtest_results, key=lambda d: d['ending_balance'], reverse=True)
backtest_results_sorted_df = pd.DataFrame(backtest_results_sorted)
backtest_results_sorted_df.to_csv(grid_search_save_path)

loaded 1048 rows of tick data from C:\GitHub Repos\ForexMachine\ForexMachine\PackageData\TicksData\mt5_EURUSD_h1_ticks_2020-11-02T00;00UTC_to_2021-01-05T00;00UTC.csv
model buffers full, beginning trade sim...
backtest percentage done: 10%
backtest percentage done: 20%
backtest percentage done: 30%
backtest percentage done: 40%
backtest percentage done: 50%
backtest percentage done: 60%
backtest percentage done: 70%
backtest percentage done: 80%
backtest percentage done: 90%
backtest percentage done: 100%

--------------------------------------------------------------------

BACKTEST RESULTS:
ticks data duration: 47 days
starting balance: 1000
ending balance: 3214.3999999999983
number of trades won: 107
number of trades lost: 73
number of buys: 157 (95 won, 62 lost)
number of sells: 23 (12 won, 11 lost)
balance range: [507.20000000001335, 3214.3999999999983]
equity range: [507.20000000001335, 3213.1999999999994]
free margin range: [349.6160380952551, 3139.596066666666]
margins range: [2

# notes on things to do

In [None]:
"""
if self.data[i][self.feature_indices['datetime']].strftime('%Y-%m-%dT%H:%M') == '2013-05-28T10:00':
    print('yo')
"""

"""
To-do:

1) tune hyperparams for backtest of xgboost for opening and CNN+Bi-LSTM for closing strat
"""