In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from   IPython.display import display, HTML
from collections import deque
import random
import os
import sys
import pathlib
from collections import deque
from tqdm.notebook import tqdm, trange
import datetime
from scipy import interpolate
import math

import matplotlib.ticker as mtick
import matplotlib.dates as md

from IPython.display import clear_output

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

import torch
import gpytorch
import matplotlib.dates as md

In [None]:
parent_module_path = os.path.abspath(os.path.join('..'))
if parent_module_path not in sys.path:
    sys.path.append(parent_module_path)

In [None]:
pd.set_option('display.width', 1000)
display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option('mode.chained_assignment', None)

# Load Data & Signals

In [None]:
from AOE.utils import get_LOB_data, get_LOB_features, get_meta_order_df, verbose_print
from AOE.plots import rescale_plot
from AOE.plots import hit_ratio_analysis, reward_distribution_analysis, regret_plots, analyze_meta_order

In [None]:
data_path     = pathlib.Path(os.path.abspath(os.path.join('..')), "data", "Market")
reward_path   = pathlib.Path(os.path.abspath(os.path.join('..')), "data", "Rewards")
feature_path  = pathlib.Path(os.path.abspath(os.path.join('..')), "data", "Features")

asset_name    = "MSFT"
tick_size     = 0.001

In [None]:
all_data = None

for trade_day in ('01', '02'):#, '03', '06', '07', '08', '09', '10', '13', '14', '16', '17', '21', '22', '23', '24', '27', '28', '29', '30'):
    trade_date    = f'2022-06-{trade_day}'
    print('Reading', trade_date)
    if False:
        LOB_data, LOB_messages     = get_LOB_data(data_path, asset_name, trade_date)
        LOB_features  = get_LOB_features(LOB_data,
                                         trade_date = trade_date,
                                         trend      = ({'w' : 25},),
                                         vol        = ({'w' : 500}, {'w' : 1000}, 
                                                       {'w' : 5000}, {'w' : 20000}),
                                         depth      = {'w' : 500},
                                         LOB_msg    = LOB_messages)
        LOB_features.to_pickle(f'../data/Features/LOB_features_{trade_date}.pkl')
    else:
        LOB_features = pd.read_pickle(f'../data/Features/LOB_features_{trade_date}.pkl')
    
    # take data every 10 trades
    LOB_features = LOB_features.iloc[::10,:].set_index('timestamp')
    
    if all_data is None: 
        all_data = LOB_features
    else:
        # LOB_features.index = LOB_features.index #+ (all_data.index.values[-1] - LOB_features.index.values[0])
        all_data = pd.concat((all_data, LOB_features))

# Optimal Strategies & Trading parameters

In [None]:
all_data['LT'] = (-(all_data['rsi_5_10000']-50)/6)
LOB_features   = all_data.dropna().sort_index()

In [None]:
strategies  = {'imbalance' : {'name'             : 'imbalance',
                              'params'           : {'feature_name'     : 'imbalance_1',
                                                      'estimation_period': '1min',
                                                      'use_interpolator' : False,
                                                      'alpha'            : 10, 
                                                      'phi'              : 0.001, 
                                                      'kappa'            : 9e-10},
                                'contextual_params': {'feature_name'  : 'vol_50_50000'}},
               
               'trend' : {'name'             : 'imbalance',
                            'params'           : {'feature_name'     : 'LT',
                                                  'estimation_period': '1min', 
                                                  'use_interpolator' : False,
                                                  'alpha'            : 10, 
                                                  'phi'              : 0.001, 
                                                  'kappa'            : 9e-10},
                            'contextual_params': {'feature_name'  : 'vol_50_50000'}},
               
               'twap'        : {'name'             : 'TWAP',
                                'params'           : {'feature_name'     : 'imbalance_3',
                                                      'estimation_period': '1min', 
                                                      'use_interpolator' : False,
                                                      'alpha'            : 10, 
                                                      'phi'              : 0.001, 
                                                      'kappa'            : 9e-10},
                                'contextual_params': {'feature_name'  : 'vol_50_50000'}}}

In [None]:
arrival_intensity   = 0.001 # in trade time
meta_order_size     = 100 # Fiwed size of every order
latency             = 0   # TODO: implement this
T                   = 60*10 # trading window for every order in seconds
trading_frequency   = 3  # in seconds

# GP bandits

In [None]:
from AOE.gp_bandit import gp_bandit

In [None]:
# Likelihood models & non stationarity params
likelihood              = gpytorch.likelihoods.GaussianLikelihood()
size_buffer             = 100 # in terms of number of rewards to sample from
size_buffer_nts         = 60*10 # in terms of seconds of oobservation
nb_intermediary_rewards = 5

# Bandit objects
bandits = {'TS'  : gp_bandit(strategies, likelihood, size_buffer, bandit_algo='TS'), 
           'UCB' : gp_bandit(strategies, likelihood, size_buffer, bandit_algo='UCB', bandit_params=0.1) }

# Other control parameters and variables

In [None]:
# variables for historical bandit data
pending_rewards           = {bandit_k: {} for bandit_k in bandits.keys() }
pending_int_rewards       = {bandit_k: {} for bandit_k in bandits.keys() }
pending_rewards['oracle'] = {}
historical_rewards        = {bandit_k: [] for bandit_k in bandits.keys() }
historical_strats         = {bandit_k: [] for bandit_k in bandits.keys() }
historical_oracle_rewards = []
historical_oracle_strats  = []
historical_all_rewards    = []
historical_reward_times   = []
order_arrival_times       = []

# variables for historical trading data
tape_meta_orders          = deque(maxlen=None) # A tape with meta order objects
meta_order_id_c           = 0 # counter to flag meta orders with IDs
order_id_c                = 0 # counter to flag orders with IDs

i_order                   = 0
nb_added_rewards          = {bandit_k:0 for bandit_k in bandits.keys()}
order_arrival_times       = LOB_features.dropna().iloc[::5,:].index.values # .timestamp.values
tape_meta_orders          = deque(maxlen=None)

In [None]:
verbose_level             = 1

# Trading simulation

In [None]:
from AOE.order import order
from AOE.meta_order import meta_order
from AOE.strategy import strategy

from AOE.stats import get_meta_order_df
from AOE.stats import get_meta_order_details
from AOE.plots import plot_meta_order

from AOE.optimal_trading import execute_strategy
from AOE.bandit_utils import update_reward_variables, update_bandit_variables
from AOE.bandit_utils import pop_from_dict, execute_and_obtain_rewards, update_pending_rewards

In [None]:
true_arrival_time_indices = np.random.poisson(arrival_intensity, size=len(order_arrival_times))

for (i_time, order_arrival_time) in enumerate(order_arrival_times): # leave a few minutes

    ###############################################
    # retrain GPs hyperparameters every N orders 
    ###############################################
    retrain_hyperparameters = False
    if i_order%10    == 0: retrain_hyperparameters=True
    
    ##################################################
    # add pending delayed rewards to bandit objects
    ##################################################
    to_pops = update_reward_variables(order_arrival_time, 
                                      historical_oracle_rewards, historical_oracle_strats, historical_all_rewards, historical_reward_times,
                                      pending_rewards, verbose_level)
    
    # Sanity check
    if len(historical_rewards['TS'])!=len(historical_rewards['UCB']):
        print('Intentional error !!')
        fdff+1
    
    pop_from_dict(pending_rewards['oracle'], to_pops)
    
    for bandit_k in bandits.keys():
        to_pops_int, to_pops = update_bandit_variables(pending_int_rewards, pending_rewards, bandits, bandit_k, 
                                                       order_arrival_time, 
                                                       verbose_level,
                                                       retrain_hyperparameters, historical_rewards, historical_strats,
                                                       nb_added_rewards)
        pop_from_dict(pending_rewards[bandit_k], to_pops)
        pop_from_dict(pending_int_rewards[bandit_k], to_pops_int)
        

    ##################################################
    # if an order arrives: add intermediary
    # and final rewards to pending list
    ##################################################
    if true_arrival_time_indices[i_time]>0:
        # randomize buys and sells
        buysell = 2*np.random.randint(0, 2, size=None, dtype=int)-1
        
        verbose_print(verbose_level, order_arrival_time, f'I received an order at {order_arrival_time} with quantity {buysell*meta_order_size}', True)
        i_order          += 1
        
        ######################
        # get feature values
        ######################
        feature_values = LOB_features.loc[order_arrival_time:].iloc[0,:].fillna(0.) # fillna should be controlled here ..
        
        ######################################
        # select the strategy for all bandits
        ######################################
        verbose_print(verbose_level, order_arrival_time, f'Selecting strategies ...')
        
        best_strategies_bandits = {}
        for bandit_k in bandits.keys():
            if nb_added_rewards[bandit_k] < 5: # at least 5 rewards before sampling
                best_strategies_bandits[bandit_k]  = random.choice(list(strategies.keys()))
            else:
                best_strategies_bandits[bandit_k]  = bandits[bandit_k].select_best_strategy(feature_values)
        
        verbose_print(verbose_level, order_arrival_time, f'Selected strategies: {best_strategies_bandits}')

        ######################################
        # Execute all available strategies 
        ######################################
        order_id_c, meta_order_id_c, reward_info, all_strats_rewards, \
        best_oracle_reward, best_oracle_strategy = execute_and_obtain_rewards(tape_meta_orders, order_id_c, meta_order_id_c, strategies, 
                                                                              LOB_features, order_arrival_time, 
                                                                              T, trading_frequency, buysell*meta_order_size, 
                                                                              latency, trade_date, verbose_level, nb_intermediary_rewards)
        
        
        ######################################
        # add intermediary and final rewards of
        # selected strategies of bandits
        ######################################
        update_pending_rewards(pending_int_rewards, pending_rewards, 
                               bandits, best_strategies_bandits, reward_info, 
                               feature_values, strategies,
                               best_oracle_strategy, best_oracle_reward, order_arrival_time, all_strats_rewards)

        
        verbose_print(verbose_level, order_arrival_time, f"Oracle strategy: {best_oracle_strategy}")
    
    if i_order%3   == 0: clear_output(wait=True)

# Regret analysis

In [None]:
# get historical rewards and regret
rewards_to_plot = pd.DataFrame(index=historical_reward_times)
regrets_to_plot = pd.DataFrame(index=historical_reward_times)

for bandit_k in bandits.keys():
    regrets_to_plot[bandit_k] = - np.array(historical_rewards[bandit_k]) + np.array(historical_oracle_rewards)
for (i_strat, strat) in enumerate(strategies.keys()):
    regrets_to_plot[strat]    = - np.array(historical_all_rewards)[:,i_strat] + np.array(historical_oracle_rewards)

for bandit_k in bandits.keys():
    rewards_to_plot[bandit_k] = np.array(historical_rewards[bandit_k])
for (i_strat, strat) in enumerate(strategies.keys()):
    rewards_to_plot[strat]    = np.array(historical_all_rewards)[:,i_strat] 

regrets_to_plot = regrets_to_plot.sort_index()
rewards_to_plot = rewards_to_plot.sort_index()

### Hit ratio

In [None]:
hit_ratio_analysis(historical_strats, bandits, historical_oracle_strats, _W = 5.5)

### Reward distributions

In [None]:
rewards_to_plot.TS.hist(bins=100, color='b', alpha=0.7)
rewards_to_plot.UCB.hist(bins=100, color='g', alpha=0.7)
rewards_to_plot.imbalance.hist(bins=100, color='grey', alpha=0.7)

plt.ylim(0, 10)
plt.legend(['TS', 'UCB', 'imbalance'])

### Reward distributions in time

In [None]:
period_est  = 5 # in number of rewards, can be "10min"
bandit_name = 'TS'

reward_distribution_analysis(bandit_name, bandits, period_est, LOB_features, rewards_to_plot, historical_reward_times, 
                                 historical_strats, historical_oracle_strats, strategies, W = 7, figure_name = None)

### Regret plots

In [None]:
regret_plots(strategies, historical_reward_times, regrets_to_plot, LOB_features, bandits, W=5.5, figure_name = None)

### Analyze a given specific order

In [None]:
meta_order_id = 12
analyze_meta_order(meta_order_id, tape_meta_orders, W=5.8, figure_name = None)

### GP plot

In [None]:
bandit_name  = 'TS'
feature_name = 'vol_50_50000'

bandits[bandit_name].plot_fit_all(lv=LOB_features[feature_name].min(), 
                                  uv=LOB_features[feature_name].max(), plot_path=None, W=9)