## Generate charts

In [1]:
# required packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import mplfinance as mpf
from PIL import Image
import os
import tqdm
from datetime import datetime

In [2]:
def generate_training_data(curr_pair, candle_hist = 40):
    
    # extract saved signal and price data | merge 
    folder_path    = f'./data/fx_data/{curr_pair[:3]}_{curr_pair[4:]}/'
    fx_signal_data = pd.read_parquet(folder_path+f'fx_data_{curr_pair[:3]}_{curr_pair[4:]}_w_sig.parquet')
    fx_data        = pd.read_parquet(folder_path+f'fx_data_{curr_pair[:3]}_{curr_pair[4:]}.parquet')
    fx_data.set_index('date',inplace = True)
    fx_final = fx_data.merge(right = fx_signal_data
                             , left_index=True
                             , right_index=True
                             , how = 'inner')

    # print status
    print('--------------------------\n'\
          f'Curr. Pair: {curr_pair[:3]}-{curr_pair[4:]}\n\n' \
          f'Start-time: {fx_final.index.min()},\nEnd-time: {fx_final.index.max()},\n\n' \
          f'Signal-Breakdown\n{pd.DataFrame(fx_final.signal.value_counts())}\n\n' \
          f'Dataset-size:{fx_final.shape}\n'\
           '--------------------------')
    
    # create a single `open', `close', `high', `low' metrics with bid/ask prices 
    fx_final.loc[:,'open']  = (fx_final.loc[:,'bidopen']  + fx_final.loc[:,'askopen'])/2 
    fx_final.loc[:,'close'] = (fx_final.loc[:,'bidclose'] + fx_final.loc[:,'askclose'])/2 
    fx_final.loc[:,'high']  = (fx_final.loc[:,'bidhigh']  + fx_final.loc[:,'askhigh'])/2 
    fx_final.loc[:,'low']   = (fx_final.loc[:,'bidlow']   + fx_final.loc[:,'asklow'])/2 
    fx_final.sort_index(inplace=True)
        
    # filter out only the required metrics / structure the dataframe
    fx_final = fx_final.loc[:,['open','close','high','low','tickqty', 'signal', 'signal_count']]
    fx_final.columns = ['open','close','high','low','volume', 'signal', 'signal_count']    
        
    # create basic moving-average indicators (50 EMA 200 EMA)
    fx_final['ewm_50_m'] = fx_final['close'].ewm(span=50
                                               , min_periods=0
                                               , adjust=False
                                               , ignore_na=False).mean()

    fx_final['ewm_50_h'] = fx_final['high'].ewm(span=50
                                               , min_periods=0
                                               , adjust=False
                                               , ignore_na=False).mean()

    fx_final['ewm_50_l'] = fx_final['low'].ewm(span=50
                                               , min_periods=0
                                               , adjust=False
                                               , ignore_na=False).mean()

    fx_final['ewm_200'] = fx_final['close'].ewm(span=200
                                               , min_periods=0
                                               , adjust=False
                                               , ignore_na=False).mean()    
        
    # number of candle-stick history to consider for each prediction
    look_back_time = candle_hist

    try:
        # get the image labels in the target folder
        file_name_li = os.listdir(folder_path+'train_imgs/')

        # extract the time labels from files
        file_time_li = [datetime.strptime(file_name[8:24], '%Y-%m-%d_%H-%M') for file_name in file_name_li]

        # get the most recent time
        max_time_existing = max(file_time_li)

        # filter time index for new data (excluding already existing data)
        fx_final_date_filtered = fx_final.index[fx_final.index>max_time_existing]

        # create incremental image chunks with 30 images (each chunk include 30 candles)
        data_idx_chuncks = [i for i in zip(fx_final_date_filtered.to_list(), fx_final_date_filtered.to_list()[look_back_time:])]

    except:
        # create incremental image chunks with 30 images (each chunk include 30 candles)
        data_idx_chuncks = [i for i in zip(fx_final.index.to_list(), fx_final.index.to_list()[look_back_time:])]           
        
        
    # generate candle-stick charts    
    for data_chunk in tqdm.tqdm(data_idx_chuncks):

        # create folder to store images
        if not os.path.exists(folder_path+'train_imgs'):
             os.makedirs(folder_path+'train_imgs')

        # filter df to generate images from (30 candles history for each signal)
        temp_df = fx_final.loc[data_chunk[0]:data_chunk[1]]

        # Signal label
        label = temp_df.signal.values[-1]

        # image pre-fix time tag
        date_start = data_chunk[0].strftime('%Y-%m-%d_%H-%M')    

        # image post-fix time tag
        date_predict = data_chunk[1].strftime('%Y-%m-%d_%H-%M')

        # path to store image
        image_path = folder_path+'train_imgs/'+f'{curr_pair[:3]}_{curr_pair[4:]}_{date_start}_{date_predict}_{label}.jpg'

        apds = [mpf.make_addplot(temp_df['ewm_50_h']
                                 #, linestyle ='dashdot'
                                 , color = 'blue'
                                 , alpha = 0.2),
                mpf.make_addplot(temp_df['ewm_50_m']
                                 #, linestyle ='dashdot'
                                 , color = 'darkblue'
                                 , alpha = 0.7),
                mpf.make_addplot(temp_df['ewm_50_l']
                                 #, linestyle ='dashdot'
                                 , color = 'blue'
                                 , alpha = 0.2),
                mpf.make_addplot(temp_df['ewm_200']
                                 #, linestyle ='line'
                                 , color='red')
               ]

        s  = mpf.make_mpf_style(base_mpf_style='charles'
                                , gridstyle = 'dashed')

        export_image = mpf.plot(temp_df
                                , type = 'candle'
                                , style = s
                                , volume = True
                                , ylabel=''
                                , ylabel_lower=''
                                , figratio = (12,8)
                                , tight_layout = True
                                , addplot = apds
                                , panel_ratios = (6,1)
                                , fill_between = dict(y1 = temp_df['ewm_50_h'].values
                                                      , y2 = temp_df['ewm_50_l'].values
                                                      , alpha=0.2
                                                      , color='b')
                                , scale_width_adjustment = dict(volume=0.7
                                                                , candle=1.2
                                                                , lines=0.3)
                                , savefig=dict(fname=image_path
                                               #, dpi=100
                                               , pad_inches=0.25))
    

In [3]:
curr_pairs = ['EUR/USD',
             'GBP/USD',
             'USD/CHF',
             'AUD/USD',
             'USD/CAD',
             'NZD/USD',
             'EUR/CHF',
             'EUR/GBP',
             'EUR/AUD',
             'EUR/CAD']

curr_pair = curr_pairs[0]

In [None]:
generate_training_data(curr_pair)

--------------------------
Curr. Pair: EUR-USD

Start-time: 2020-06-25 07:15:00,
End-time: 2020-11-19 13:45:00,

Signal-Breakdown
      signal
HOLD   10225
BUY       10
SELL       7

Dataset-size:(10242, 11)
--------------------------


 50%|████████████████████████████████████▌                                    | 5109/10202 [1:13:58<1:34:46,  1.12s/it]