In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from   IPython.display import display, HTML
import pprint
import datetime
import matplotlib.dates as md
from pathlib import Path
import os
import math
from scipy.stats import poisson
from scipy import interpolate
from collections import deque
import matplotlib.dates as md
from IPython.display import clear_output
from scipy.interpolate import interpn
from scipy.interpolate import RegularGridInterpolator
import sys
import pathlib

In [2]:
import statsmodels.api as sm
import scipy.integrate as integrate

In [3]:
pd.set_option('display.width', 1000)
display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option('mode.chained_assignment', None)
import matplotlib.ticker as mtick
fmt = '${x:,.2f}'
tick = mtick.StrMethodFormatter(fmt)
tick2 = mtick.StrMethodFormatter('${x:,.0f}')
normal = mtick.StrMethodFormatter('{x:,.0f}')
normal2 = mtick.StrMethodFormatter('{x:,.2f}')
percc = mtick.StrMethodFormatter('{x:,.0%}')

In [4]:
parent_module_path = os.path.abspath(os.path.join('..'))
if parent_module_path not in sys.path:
    sys.path.append(parent_module_path)

In [5]:
colors       = {'red': '#ff207c', 'grey': '#42535b', 'blue': '#207cff', 'orange': '#ffa320', 'green': '#00ec8b'}
config_ticks = {'size': 14, 'color': colors['grey'], 'labelcolor': colors['grey']}
config_title = {'size': 18, 'color': colors['grey'], 'ha': 'left', 'va': 'baseline'}

In [6]:
def ema(series, periods, fillna=False):
    if fillna:
        return series.ewm(span=periods, min_periods=0).mean()
    return series.ewm(span=periods, min_periods=periods).mean()

def rsi(close, n=14, rtrfreq=1, fillna=False): # n in number of trades
    diff = close.diff(rtrfreq)
    which_dn = diff < 0

    up, dn = diff, diff*0
    up[which_dn], dn[which_dn] = 0, -up[which_dn]

    emaup = ema(up, n, fillna)
    emadn = ema(dn, n, fillna)

    rsi = 100 * emaup / (emaup + emadn)
    if fillna:
        rsi = rsi.replace([np.inf, -np.inf], np.nan).fillna(50)
    return pd.Series(rsi, name='rsi')

def macd_signal(close, n_fast=12, n_slow=26, n_sign=9, fillna=False):
    """Moving Average Convergence Divergence (MACD Signal)
    Args:
        close(pandas.Series): dataset 'Close' column.
        n_fast(int): n period short-term.
        n_slow(int): n period long-term.
        n_sign(int): n period to signal.
        fillna(bool): if True, fill nan values.

    Returns:
        EMA of MACD: pandas.Series: New feature generated.
    """
    emafast = ema(close, n_fast, fillna)
    emaslow = ema(close, n_slow, fillna)
    macd = emafast - emaslow
    macd_signal = ema(macd, n_sign, fillna)
    
    macd_diff = macd - macd_signal
    if fillna:
        macd_signal = macd_signal.replace([np.inf, -np.inf], np.nan).fillna(0)
    return pd.Series(macd_diff, name='MACD_diff')

# Reward construction

In [11]:
def get_rewards(_LOB_features, _signal_names, _verbose, _freq= 's'):
    # first add MR & TF
    nb_seconds = len(_LOB_features.resample(f"1{_freq}"))
    res        = {}
    print(f"number {_freq}:", nb_seconds)
    for _i_second in range(1, nb_seconds):
        data_batch = _LOB_features.first(f"{_i_second}{_freq}").last(f"1{_freq}")
        tmp_res    = {}

        if _verbose: print('\n**** ', data_batch.index[0])

        for signal_name in _signal_names:
            executionPrice_buy  = None
            executionPrice_sell = None

            S0               = data_batch.iloc[0].mid_price
            ST               = data_batch.iloc[-1].mid_price
            tmp_res['S0']    = S0
            tmp_res['ST']    = ST
            tmp_res['Twap_executionPrice'] = data_batch.mid_price.mean()
            
            # for ti in range(len(data_batch)-1):
            It  = data_batch.iloc[0][signal_name]

            # check if signal is MR or TF
            if 'MR' in signal_name:
                It = -It

            # Stp1          = data_batch.iloc[ti+1].mid_price
            if It > 0:
                # if executionPrice_buy is None: 
                executionPrice_buy = S0
                executionPrice_sell = ST
                
            if It < 0:
                # if executionPrice_sell is None: 
                executionPrice_buy = ST 
                executionPrice_sell = S0

            # if executionPrice_buy is None:  executionPrice_buy  = data_batch.iloc[-1].mid_price
            # if executionPrice_sell is None: executionPrice_sell = data_batch.iloc[-1].mid_price

            tmp_res[signal_name] = {'executionPrice_buy' : executionPrice_buy, 
                                    'executionPrice_sell' : executionPrice_sell
                                    }
            
            if _verbose:
                print('** Signal name=', signal_name, 
                      ', executionPrice_buy=', executionPrice_buy,
                      ', executionPrice_sell=', executionPrice_sell,
                      ', S0=', S0,
                      ', ST=', ST)

            res[data_batch.index.values[0]] = tmp_res
    return res

In [12]:
def export_reward_results(_signal_names, _LOB_features, _trade_date, _res, _reward_path, _asset_name):
    res_pnls = {}
    keys     = []
    twapPrices, S0s, STs  = [], [], []
    
    for key_ in _res.keys():
        twapPrices += [_res[key_]['Twap_executionPrice']  ]
        S0s        += [_res[key_]['S0']]
        STs        += [_res[key_]['ST']] 
        keys       += [key_]

    for signal_name in _signal_names:
        pnls, buyPrices, sellPrices  = [], [], []
        for key_ in _res.keys():
            try:
                buyPrices  += [_res[key_][signal_name]['executionPrice_buy']  ] 
                sellPrices += [_res[key_][signal_name]['executionPrice_sell']  ]
            except Exception as e:
                buyPrices  += [np.nan]
                sellPrices += [np.nan]

        res_pnls[signal_name] = (buyPrices, sellPrices)
        
    df_pnls         = pd.DataFrame.from_dict( {_signal_names[i]:res_pnls[_signal_names[i]][0] for i in range(len(_signal_names)) })
    df_pnls.index   =  keys
    df_pnls.columns = [f'buyPrice_{col}' for col in df_pnls.columns]
    for signal_name in _signal_names: 
        df_pnls[f'sellPrice_{signal_name}'] = res_pnls[signal_name][1] 

    df_pnls[f'twapPrice'] = [_res[key_]['Twap_executionPrice'] for key_ in _res.keys()]
    df_pnls[f'S0'] = [_res[key_]['S0'] for key_ in _res.keys()]
    df_pnls[f'ST'] = [_res[key_]['ST'] for key_ in _res.keys()]
    
    pnl_features = _LOB_features.loc[df_pnls.index]
    
    pd.concat((df_pnls, pnl_features), axis=1).to_pickle(os.path.join(_reward_path, f'{_asset_name}_{_trade_date}_rewards.pkl'))

In [13]:
data_path     = pathlib.Path(os.path.abspath(os.path.join('..')), "data", "Market")
feature_path  = pathlib.Path(os.path.abspath(os.path.join('..')), "data", "Features")
reward_path   = pathlib.Path(os.path.abspath(os.path.join('..')), "data", "Rewards")
asset_names    = ("BIDU", "COST", "CSCO", "GILD", "DELL", "GOOG", "INTC") 
asset_names    = ("GOOG",) 

# Main script

In [14]:
from AOE.utils import get_LOB_data, verbose_print, get_features

In [15]:

for asset_name in asset_names:
    signal_names  = ["imbalance_1", "MR", "TF", "MR2", "s_imbalance_1", "s_MR", "s_TF", "s_MR2"]
    verbose       = False
    tick_size     = 0.01

    i_file        = 0
    list_files    = os.listdir(feature_path)

    len_list_files_an = len([s for s in os.listdir(feature_path) if asset_name in s])
    for filename in list_files:
        f = os.path.join(feature_path ,filename)
        if (('features' in f) & (asset_name in f)):
            if os.path.isfile(f):
                i_file     += 1
                trade_date = f.split('_')[1]
                clear_output(wait=True)
                print('*****************************************')
                print('*     ', asset_name)
                print('*****************************************')
                print('trade_date:', trade_date)
                print(f'{ round(100*i_file/ len_list_files_an)} %')
                # if os.path.isfile(os.path.join(reward_path, f'{trade_date}_rewards.pkl')):
                #     print('File already exists')
                # else:
                LOB_features = pd.read_pickle(f).set_index('timestamp')
                LOB_features['TF'] = LOB_features['macd_12_26_9']
                LOB_features['MR'] = LOB_features['macd_12_26_9']  
                LOB_features['MR2'] = LOB_features['macd_8_24_9']
                
                LOB_features['s_imbalance_1'] = LOB_features['imbalance_1'].rolling(10).mean()
                LOB_features['s_TF'] = LOB_features['TF'].rolling(10).mean()
                LOB_features['s_MR'] = LOB_features['MR'].rolling(10).mean()
                LOB_features['s_MR2'] = LOB_features['MR2'].rolling(10).mean()

                res = get_rewards(LOB_features, signal_names, verbose)
                export_reward_results(signal_names, LOB_features, trade_date, res, reward_path, asset_name)

*****************************************
*      GOOG
*****************************************
trade_date: 2022-11-09
100 %
number s: 23400
