# Day 02.12.2021 Simulation

In [3]:
import pandas as pd
import numpy as np
from data_tools.simulation_data import SimulationDataCreator
from data_tools.simulation_run import RunSimulation
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", 40)
pd.set_option("display.precision", 5)

In [4]:
# upload, clean and prepare the data
simulation_data_creator = SimulationDataCreator("/Users/tom/dev/bbp-research/configuration_files/simulation_data_creator.json")
# upload the data
data_dict = simulation_data_creator.upload_data(["market_data", "market_data_1", "view_market_data"])
# convert invoker column
data_dict = simulation_data_creator.convert_invokers(data_dict, ["market_data", "market_data_1", "view_market_data"])
# merge the data
merged_data = simulation_data_creator.merge_markets(data_dict["market_data"]["data"], data_dict["market_data_1"]["data"], "timestamp", True)
merged_data = simulation_data_creator.merge_markets(merged_data, data_dict["view_market_data"]["data"], "timestamp", True)
# # add time
merged_data = simulation_data_creator.add_time(merged_data, "timestamp")
# data_dict = simulation_data_creator.add_times(data_dict, ["market_data", "market_data_1", "view_market_data"])

the value counts of column invoker is:

FCI_offer_0    720757
FCI_bid_0      683806
IBI_bid_0      294108
IBI_offer_0    294108
TA1_offer_0    183453
TA1_bid_0      183452
EBS_offer_0    170324
EBS_bid_0      170324
IDB_offer_0    154616
IDB_bid_0      154616
dtype: int64
1705224, 1705224
the value counts of column invoker is:

CITIQ_BARAK_offer_0       44581
CITIQ_BARAK_bid_0         44534
CREDITQ_BARAK_offer_0     34838
CREDITQ_BARAK_bid_0       34831
JPQ_BARAK_bid_0           32918
JPQ_BARAK_offer_0         30401
UBSQ_BARAK_offer_0        23504
UBSQ_BARAK_bid_0          23481
GOLDMANQ_BARAK_bid_0      21705
GOLDMANQ_BARAK_offer_0    21661
dtype: int64
285184, 285184
the value counts of column invoker is:

TRADAIR_LP12_offer_0    218767
TRADAIR_LP12_bid_0      218767
TRADAIR_LP1_offer_0     154098
TRADAIR_LP1_bid_0       154098
TRADAIR_LP15_bid_0      133454
TRADAIR_LP15_offer_0    133454
TRADAIR_LP3_bid_0       118637
TRADAIR_LP3_offer_0     118637
TRADAIR_LP6_bid_0       116233
TRA

In [None]:
print(data_dict["market_data"]["data"].shape[0])
print(data_dict["market_data_1"]["data"].shape[0])
print(data_dict["view_market_data"]["data"].shape[0])

In [None]:
print(data_dict["market_data"]["brokers"])
print(data_dict["market_data_1"]["brokers"])
print(data_dict["view_market_data"]["brokers"])

## arena 1 simulation
- entering position condition: spread_tag_inc(TA1_offer, IBI_bid) < 0
- exiting position condition: current_best_bid >= best_offer_at_signal + 0.00005 or current_best_bid < best_bid_at_signal - 0.00005
- hit_brokers = ['FCI', 'IBI', 'TA1', 'IDB']
- view_brokers = none
- positon_fee = 0.00001
- max time in position = false

In [5]:
class RunSimulation():
    def __init__(self, simulation_name, hit_brokers, view_brokers, enter_conditions, exit_conditions, positon_fee, max_time_in_position):
        self.simulation_name = simulation_name
        self.hit_brokers = hit_brokers
        self.view_brokers = view_brokers
        self.enter_conditions = enter_conditions
        self.exit_conditions = exit_conditions
        self.positon_fee = positon_fee
        self.max_time_in_position = max_time_in_position
    
    def initialize_summary_df(self, simulation_type):
        if simulation_type == "increase":
            self.signal_value = []

            self.timestamp_at_entering = []
            self.hour_at_entering = []
            self.best_offer_at_entering = []
            self.best_offer_broker_at_entering = []
            self.best_bid_at_entering = []
            self.best_bid_broker_at_entering = []

            self.best_offer_at_exiting = []
            self.best_offer_broker_at_exiting = []
            self.best_bid_at_exiting = []
            self.best_bid_broker_at_exiting = []
            self.timestamp_at_exiting = []

            self.position_outcome = []
            self.position_profit = []


    # def populate_summary_df(self, simulation_type, position_data):
    #     pass


    def prepaer_data_for_simulation(self, df):
        # union self.hit_brokers with self.view_brokers
        # brokers_to_keep = list(set(self.hit_brokers).union(set(self.view_brokers)))
        # df_to_return = df.copy()
        # # keep only rows in which brokers to keep invoke
        # columns_to_iterate = []
        # for broker_to_keep in brokers_to_keep:
        #     broker_col = f"{broker_to_keep}_bid_0"
        #     columns_to_iterate.append(broker_col)
        #     broker_col = f"{broker_to_keep}_offer_0"
        #     columns_to_iterate.append(broker_col)
        
        # # keep only hit and view brokers
        # df_to_return = df_to_return[df_to_return[columns_to_iterate].any(axis=1)]

        # # drop unwanted columns
        # columns_to_drop = []

        # create offer hit columns
        offer_hit_column_name = []
        for broker_name in self.hit_brokers:
            col_name_to_append = f"{broker_name}_offer_0_rate"
            offer_hit_column_name.append(col_name_to_append)
        
        bid_hit_column_name = []
        for broker_name in self.hit_brokers:
            col_name_to_append = f"{broker_name}_bid_0_rate"
            bid_hit_column_name.append(col_name_to_append)

        # calculate best offer/bid for hit brokers
        df['best_offer_hit_rate'] = df[offer_hit_column_name].min(axis=1, skipna=True)
        df['best_offer_hit_broker'] = df[offer_hit_column_name].idxmin(axis=1, skipna=True)
        df['best_bid_hit_rate'] = df[bid_hit_column_name].max(axis=1, skipna=True)
        df['best_bid_hit_broker'] = df[bid_hit_column_name].idxmax(axis=1, skipna=True)

        # convert to numpy
        self.df_col_list = list(df.columns.values)
        df_np = df.to_numpy().copy()
        number_of_rows = df.shape[0] - 1
        return self.df_col_list, df_np, number_of_rows
    
    def spread_tag_inc_entering_condition(self, current_market_data, offer_broker, bid_broker, threshold_value):
        invoker_bid_column_name = f"{bid_broker}_bid_0"
        if current_market_data[self.df_col_list.index(invoker_bid_column_name)]:
            bid_broker_rate_name = f"{bid_broker}_bid_0_rate"
            offer_broker_rate_name = f"{offer_broker}_offer_0_rate"
            spread_tag_inc = current_market_data[self.df_col_list.index(offer_broker_rate_name)] - current_market_data[self.df_col_list.index(bid_broker_rate_name)]
            if spread_tag_inc <= threshold_value:
                is_there_signal = True
            else:
                is_there_signal = False
                spread_tag_inc = None
        else:
            is_there_signal = False
            spread_tag_inc = None
        return is_there_signal, spread_tag_inc

    def find_bests_for_hit_brokers(self, current_market_data):
        best_offer = current_market_data[self.df_col_list.index("best_offer_hit_rate")]
        best_offer_broker = current_market_data[self.df_col_list.index("best_offer_hit_broker")]
        best_bid = current_market_data[self.df_col_list.index("best_bid_hit_rate")]
        best_bid_broker = current_market_data[self.df_col_list.index("best_bid_hit_broker")]
        return best_offer, best_offer_broker, best_bid, best_bid_broker

    def check_exit_limit_condition_inc(self, current_market_data, upper_limit_value, lower_limit_value):
        exit_position_status = False
        if current_market_data[self.df_col_list.index("best_bid_hit_rate")] >= upper_limit_value or current_market_data[self.df_col_list.index("best_bid_hit_rate")] < lower_limit_value:
            exit_position_status = True
        return exit_position_status
    
    def documnet_position(self, market_data_at_entering, market_data_at_exiting, signal_value):
        self.timestamp_at_entering.append(market_data_at_entering[self.df_col_list.index("timestamp")])
        self.signal_value.append(signal_value)

        self.hour_at_entering.append(market_data_at_entering[self.df_col_list.index("hour")])
        self.best_offer_at_entering.append(market_data_at_entering[self.df_col_list.index("best_offer_hit_rate")])
        self.best_offer_broker_at_entering.append(market_data_at_entering[self.df_col_list.index("best_offer_hit_broker")])
        self.best_bid_at_entering.append(market_data_at_entering[self.df_col_list.index("best_bid_hit_rate")])
        self.best_bid_broker_at_entering.append(market_data_at_entering[self.df_col_list.index("best_bid_hit_broker")])

        self.best_offer_at_exiting.append(market_data_at_exiting[self.df_col_list.index("best_offer_hit_rate")])
        self.best_offer_broker_at_exiting.append(market_data_at_exiting[self.df_col_list.index("best_offer_hit_broker")])
        self.best_bid_at_exiting.append(market_data_at_exiting[self.df_col_list.index("best_bid_hit_rate")])
        self.best_bid_broker_at_exiting.append(market_data_at_exiting[self.df_col_list.index("best_bid_hit_broker")])
        self.timestamp_at_exiting.append(market_data_at_exiting[self.df_col_list.index("timestamp")])

        position_outcome = market_data_at_exiting[self.df_col_list.index("best_bid_hit_rate")] - market_data_at_entering[self.df_col_list.index("best_offer_hit_rate")]

        self.position_outcome.append(np.round(position_outcome, 6))
        self.position_profit.append(np.round(position_outcome - self.positon_fee, 6))
    
    def create_simulation_summary(self):
        summary_dict = {
            "signal_value": self.signal_value,
            "timestamp_at_entering": self.timestamp_at_entering,
            "hour_at_entering": self.hour_at_entering,
            "best_offer_at_entering": self.best_offer_at_entering,
            "best_offer_broker_at_entering": self.best_offer_broker_at_entering,
            "best_bid_at_entering": self.best_bid_at_entering,
            "best_bid_broker_at_entering": self.best_bid_broker_at_entering,
            "best_offer_at_exiting": self.best_offer_at_exiting,
            "best_offer_broker_at_exiting": self.best_offer_broker_at_exiting,
            "best_bid_at_exiting": self.best_bid_at_exiting,
            "best_bid_broker_at_exiting": self.best_bid_broker_at_exiting,
            "timestamp_at_exiting": self.timestamp_at_exiting,
            "position_outcome": self.position_outcome,
            "position_profit": self.position_profit
        }
        summary_df = pd.DataFrame.from_dict(summary_dict)
        return summary_df

In [6]:
arena_1_sim_1 = RunSimulation(simulation_name = "arena 1 simulation 1", 
                              hit_brokers = ['FCI', 'IBI', 'TA1', 'IDB'],
                              view_brokers = [],
                              enter_conditions = "spread_tag_inc(TA1_offer, IBI_bid) < 0",
                              exit_conditions = "current_best_bid >= best_offer_at_signal + 0.00005 or current_best_bid < best_bid_at_signal - 0.00005",
                              positon_fee = 0.00001, 
                              max_time_in_position = False)
# initialize summary data frame                   
arena_1_sim_1.initialize_summary_df("increase")
# prepaer data for simulation
arena_1_sim_1_df = merged_data.copy()
sim_df_col_list, sim_df_np, sim_number_of_rows = arena_1_sim_1.prepaer_data_for_simulation(arena_1_sim_1_df)

In [7]:
for i in range(sim_number_of_rows):
    # current market row
    current_market_data = sim_df_np[i]
    # check if entering condition holds
    if current_market_data[sim_df_col_list.index("IBI_bid_0")]:
        spread_tag_inc = current_market_data[sim_df_col_list.index("TA1_offer_0_rate")] - current_market_data[sim_df_col_list.index("IBI_bid_0_rate")]
        if spread_tag_inc <= 0.0:
            best_offer_value, best_offer_broker, best_bid_value, best_bid_broker = arena_1_sim_1.find_bests_for_hit_brokers(current_market_data)
            temp_index = i + 1
            upper_limit_value = best_offer_value + 0.00005
            lower_limit_value = best_bid_value - 0.00005
            while temp_index < sim_number_of_rows and (current_market_data[sim_df_col_list.index("best_bid_hit_rate")] >= upper_limit_value or current_market_data[sim_df_col_list.index("best_bid_hit_rate")] < lower_limit_value):
                if temp_index >= sim_number_of_rows:
                    break
                else:
                    arena_1_sim_1.documnet_position(current_market_data, sim_df_np[temp_index], spread_tag_inc)

TypeError: unsupported operand type(s) for -: 'str' and 'float'

In [None]:
running_index = 0
while running_index < sim_number_of_rows:
    # current market row
    current_market_data = sim_df_np[running_index]
    # check if entering condition holds
    is_there_signal, spread_tag_inc_value = arena_1_sim_1.spread_tag_inc_entering_condition(current_market_data, "TA1", "IBI", 0.0)
    if is_there_signal:
        print(running_index)
        best_offer_value, best_offer_broker, best_bid_value, best_bid_broker = arena_1_sim_1.find_bests_for_hit_brokers(current_market_data)
        temp_index = running_index + 1
        while temp_index < sim_number_of_rows:
            if arena_1_sim_1.check_exit_limit_condition_inc(sim_df_np[temp_index], best_offer_value + 0.00005, best_bid_value - 0.00005):
                arena_1_sim_1.documnet_position(current_market_data, sim_df_np[temp_index], spread_tag_inc_value)
                running_index = temp_index + 1
                break
            else:
                temp_index += 1