## Generate signals

In [8]:
# import packages
import pandas as pd
import numpy as np
import tqdm
import os

In [9]:
# list of instruments need to extract fx-data for
curr_pairs = ['EUR/USD',
             'GBP/USD',
             'USD/CHF',
             'AUD/USD',
             'USD/CAD',
             'NZD/USD',
             'EUR/CHF',
             'EUR/GBP',
             'EUR/AUD',
             'EUR/CAD']

In [21]:
def create_signal(df):
    """Buy/Sell signal at the 3rd candle-stick open (i.e., 2nd candle-stick close)
    
        Buy setup
         - based on 6 candle-sticks ( 6 x 15mins = 1.5hrs)
         - enter buy at 2nd candle-stick (2 x 15 = .5hrs)
         - stay in trade for 1hr
            - t-1[close] < t-1[open]
            - t[close] > t[low] + t[open]
            T = t to t+4
            - t+1[close] > t+1[open] + t+1[low]
            - t+1[high] > t[high]
            - t+1[low] > t[low] 
        At t+4
            -diff(t[close],t+4[close]) >= 0.0020
            
    """
    df.sort_index(inplace=True)
    
    temp_dic = {}
    
    i = 0
    for idx, row in df.iterrows():
        temp_dic[f't_{i}'] = row
        i+=1
    
    ############################################
            ### DEFINE BUY SIGNALS ###
    ############################################
    buy_signal_vec = [] 

    # t = -1 | past 
    s1 = temp_dic['t_0'].close  < temp_dic['t_0'].open # t-1[close] < t-1[open]

    # t = 0 | present
    s2 = temp_dic['t_1'].close  > temp_dic['t_1'].open  # t[close] > t[open]
    s3 = temp_dic['t_1'].close  > temp_dic['t_1'].low   # t[close] > t[low]
    s4 = temp_dic['t_1'].high  >= temp_dic['t_1'].close # t[high]  > t[close]

    buy_signal_vec.append(np.array([s1,s2,s3,s4]).sum() == 4)        ### <-- GENERATES 1 TRUE VALUES ---
    
    # t = 1,2,3 | future
    for i in [2,3,4]:                                                ### <-- GENERATES 3 TRUE VALUES ---
    #for i in [2,3,4,5]:

        s1 = temp_dic[f't_{i}'].close  > temp_dic[f't_{i}'].open   # t+1[close] > t+1[open]
        s2 = temp_dic[f't_{i}'].close  > temp_dic[f't_{i}'].low    # t+1[close] > t+1[low]
        s3 = temp_dic[f't_{i}'].high  >= temp_dic[f't_{i}'].close  # t+1[high] > t+1[close]

        s4 = temp_dic[f't_{i}'].high  >= temp_dic[f't_{(i-1)}'].high          # t+1[high] > t[high]
        s5 = temp_dic[f't_{i}'].low   >= temp_dic[f't_{(i-1)}'].low           # t+1[low]  > t[low]
        s6 = temp_dic[f't_{i}'].high  >= temp_dic[f't_{(i-1)}'].close         # t+1[high] > t[close]
        s7 = temp_dic[f't_{i}'].low   >= temp_dic[f't_{(i-1)}'].close - 0.001 # t+1[low]  > t[close] - 0.001

        #if i == 5:
        if i == 4:
            s8 = (temp_dic[f't_4'].close - temp_dic[f't_2'].open) >= 0.0021 # 21 pips
            buy_signal_vec.append(np.array([s1,s2,s3,s4,s5,s6,s7,s8]).sum() == 8)        
            del s8
            continue

        buy_signal_vec.append(np.array([s1,s2,s3,s4,s5,s6,s7]).sum() == 7)               
        
    del (s1,s2,s3,s4,s5,s6,s7)
    
    ############################################
            ### DEFINE SELL SIGNALS ###
    ############################################    
    sell_signal_vec = [] 

    # t = -1 | past 
    s1 = temp_dic['t_0'].close  > temp_dic['t_0'].open # t-1[close] > t-1[open]

    # t = 0 | present
    s2 = temp_dic['t_1'].close  < temp_dic['t_1'].open  # t[close] < t[open]
    s3 = temp_dic['t_1'].close  < temp_dic['t_1'].high  # t[close] < t[high]
    s4 = temp_dic['t_1'].low   <= temp_dic['t_1'].close # t[low]   < t[close]

    sell_signal_vec.append(np.array([s1,s2,s3,s4]).sum() == 4)       ### <-- GENERATES 1 TRUE VALUES ---
    
    # t = 1,2,3 | future
    for i in [2,3,4]:                                                ### <-- GENERATES 3 TRUE VALUES ---
    #for i in [2,3,4,5]:

        s1 = temp_dic[f't_{i}'].close  < temp_dic[f't_{i}'].open   # t+1[close] < t+1[open]
        s2 = temp_dic[f't_{i}'].close  < temp_dic[f't_{i}'].high   # t+1[close] < t+1[high]
        s3 = temp_dic[f't_{i}'].close >= temp_dic[f't_{i}'].low    # t+1[low]   < t+1[close]

        s4 = temp_dic[f't_{i}'].high  <= temp_dic[f't_{(i-1)}'].high          # t+1[high] < t[high] + 0.001
        s5 = temp_dic[f't_{i}'].low   <= temp_dic[f't_{(i-1)}'].low           # t+1[low]  < t[low]
        s6 = temp_dic[f't_{i}'].low   <= temp_dic[f't_{(i-1)}'].close         # t+1[low]  < t[close]
        s7 = temp_dic[f't_{i}'].high  <= temp_dic[f't_{(i-1)}'].close + 0.001 # t+1[high]  < t[close] + 0.001
        
        #if i == 5:
        if i == 4:
            s8 = (temp_dic[f't_2'].open - temp_dic[f't_4'].close) >= 0.0021 # 21 pips
            sell_signal_vec.append(np.array([s1,s2,s3,s4,s5,s6,s7,s8]).sum() == 8) 
            del s8
            continue

        sell_signal_vec.append(np.array([s1,s2,s3,s4,s5,s6,s7]).sum() == 7)         
    
    del (s1,s2,s3,s4,s5,s6,s7)
    
    ############################################
            ###   DEFINE OUTPUT   ###
    ############################################  
    
    tmp_df = pd.DataFrame(df.iloc[1,]).T
    
    if sum(buy_signal_vec) == 4:       # total buy signals
        tmp_df.loc[:,'signal'] = 'BUY'
        tmp_df.loc[:,'signal_count'] = sum(buy_signal_vec)
    elif sum(sell_signal_vec) == 4:    # total sell signals
        tmp_df.loc[:,'signal'] = 'SELL'
        tmp_df.loc[:,'signal_count'] = sum(sell_signal_vec)        
    else:
        tmp_df.loc[:,'signal'] = 'HOLD'
        tmp_df.loc[:,'signal_count'] = max(sum(buy_signal_vec),sum(sell_signal_vec))
        
    return tmp_df

In [22]:
def generate_signals(overwrite=False):

    completed = []
    for curr_pair in curr_pairs:

        print('\n------------------------------------\n'\
              f'Current pair: {curr_pair}'+" || Completed pair(s): "+", ".join(completed), end='\r')

        # load data
        folder_path = f'./data/fx_data/{curr_pair[:3]}_{curr_pair[4:]}/'
        fx_data = pd.read_parquet(folder_path+f'fx_data_{curr_pair[:3]}_{curr_pair[4:]}.parquet')

        if overwrite:
            # DELETE EXSISTING SIGNAL DATAFILES
            try:
                os.remove(folder_path+f'fx_data_{curr_pair[:3]}_{curr_pair[4:]}_w_sig.parquet')
            except:
                pass

        # process data (join bid/ask prices to create a single price)
        fx_data.loc[:,'open']  = (fx_data.loc[:,'bidopen']  + fx_data.loc[:,'askopen'])/2 
        fx_data.loc[:,'close'] = (fx_data.loc[:,'bidclose'] + fx_data.loc[:,'askclose'])/2 
        fx_data.loc[:,'high']  = (fx_data.loc[:,'bidhigh']  + fx_data.loc[:,'askhigh'])/2 
        fx_data.loc[:,'low']   = (fx_data.loc[:,'bidlow']   + fx_data.loc[:,'asklow'])/2 

        # convert data to date-time
        fx_data.date = pd.to_datetime(fx_data.date)

        # filter out unessasery data
        fx_data = fx_data.loc[:,['date','open','close','high','low']]

        # set date as the index
        fx_data.set_index('date', inplace = True)

        # get already created signal data from stored data | remove exsisting data with signals from the process
        try:
            prvs_data      = pd.read_parquet(folder_path+f'fx_data_{curr_pair[:3]}_{curr_pair[4:]}_w_sig.parquet')
            idx_in_prvs_df = fx_data.index.isin(prvs_data.index)               # check for dates already in previous signal dataset
            fx_data        = fx_data.loc[~idx_in_prvs_df]                      # only filter for the data not included in previous signal dataset
            fx_data        = fx_data.loc[fx_data.index>prvs_data.index.max()]  # only consider the latest data against previously saved data
        except:
            pass

        # create 6 candle-stick dataframe chunks (based on dates)
        data_idx_chuncks = [i for i in zip(fx_data.index.to_list(), fx_data.index.to_list()[6:])]


        # generate signals for the selected new data
        fx_data_li = []
        for data_chunk in data_idx_chuncks:
            fx_data_li.append(create_signal(fx_data.loc[data_chunk[0]:data_chunk[1]]))

        # if new signals are generated, create a dataframe with them
        if len(fx_data_li)>0:
            fx_data_out = pd.concat(fx_data_li)
            fx_data_out = fx_data_out.loc[:,['signal','signal_count']]
        else:
            # if no new signals created - pass on to next curr. pair
            continue

        # prepare the final signal dataset
        try:
            data_out = pd.concat([prvs_data,fx_data_out])           # incase an old signal file is available, join with it
        except:
            data_out = fx_data_out.loc[:,['signal','signal_count']] # otherwise, store the new signal data

        # remove duplicates if any are available
        dup_flag = True  
        while dup_flag:

            data_out = data_out.loc[~data_out.index.duplicated(keep='last')]
            if data_out.index.duplicated().sum() == 0:
                dup_flag = False

        # sort signal dataset by `date' index
        data_out.sort_index(inplace=True)

        # store new data | index include dates
        data_out.to_parquet(folder_path+f'fx_data_{curr_pair[:3]}_{curr_pair[4:]}_w_sig.parquet', index = True)

        # Name list to print
        completed.append(curr_pair)

        #print(data_out.signal.value_counts(),end='\r')

        # print status
        print('\n------------------------------------\n'\
              f'Curr. Pair: {curr_pair[:3]}-{curr_pair[4:]}\n\n' \
              f'Start-time: {data_out.index.min()},\nEnd-time: {data_out.index.max()},\n\n' \
              f'Signal-Breakdown\n{pd.DataFrame(data_out.signal.value_counts())}\n\n' \
              f'Dataset-size:{data_out.shape}\n'\
               '------------------------------------',end='\r')

    print('\nEXECUTION COMPLETE!' , end='\r')

In [23]:
generate_signals(overwrite=True)


------------------------------------
Current pair: EUR/USD || Completed pair(s): 
------------------------------------
Curr. Pair: EUR-USD

Start-time: 2020-06-25 07:15:00,
End-time: 2020-11-20 18:15:00,

Signal-Breakdown
      signal
HOLD   10326
SELL      17
BUY       15

Dataset-size:(10358, 2)
--------------------------
------------------------------------
Current pair: GBP/USD || Completed pair(s): EUR/USD
------------------------------------
Curr. Pair: GBP-USD

Start-time: 2020-06-24 21:30:00,
End-time: 2020-11-20 18:15:00,

Signal-Breakdown
      signal
HOLD   10271
SELL      45
BUY       42

Dataset-size:(10358, 2)
--------------------------
------------------------------------
Current pair: USD/CHF || Completed pair(s): EUR/USD, GBP/USD
------------------------------------
Curr. Pair: USD-CHF

Start-time: 2020-06-24 20:15:00,
End-time: 2020-11-20 18:15:00,

Signal-Breakdown
      signal
HOLD   10341
BUY        9
SELL       8

Dataset-size:(10358, 2)
-------------------------