In [1]:
import pickle
import random

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import torch
from plotly.subplots import make_subplots

from simulator.simulator import Sim
from strategies.rl import A2CNetwork, Policy, RLStrategy, A2C, ComputeValueTargets, evaluate
from utils.get_info import get_pnl, get_volumes
from utils.load_data import load_md_from_file

ModuleNotFoundError: No module named 'simulator.simulator'; 'simulator' is not a package

In [2]:
# PATH_TO_FILE = 'md/btcusdt:Binance:LinearPerpetual/'
PATH_TO_FILE = 'md_new/ethusdt/'

seed = 13
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cpu')

# Training

In [3]:
with open('data/features_dict_eth.pickle', 'rb') as f:
    ess_dict = pickle.load(f)
    
ess_df = pd.DataFrame.from_dict(ess_dict, orient='index').reset_index().rename(columns={'index': 'receive_ts'})

with open('data/means_eth.npy', 'rb') as f:
    means = np.load(f)
    
with open('data/stds_eth.npy', 'rb') as f:
    stds = np.load(f)

del(ess_dict)

In [4]:
md_all = load_md_from_file(path=PATH_TO_FILE, nrows=10_000_000, btc=False)

In [5]:
# md_train = md_all[:1_035_000]
# md_test = md_all[1_035_000:1_080_000]
md_train = md_all[1:200_000]
md_test = md_all[200_000:230_000]

In [10]:
model = A2CNetwork(n_actions=10, DEVICE=DEVICE).to(DEVICE)
# model.load_state_dict(torch.load("models/ETH_50step_15_50act_rew_pen.pth"))
policy = Policy(model)

In [11]:
delay = pd.Timedelta(0.1, 's').delta
hold_time = pd.Timedelta(10, 's').delta

strategy = RLStrategy(policy, ess_df, 1.0, means, stds, delay, hold_time, [ComputeValueTargets(policy)],
                      trade_size=0.01, post_only=True, taker_fee=0.0004, maker_fee=-0.00004)

optimizer = torch.optim.RMSprop(model.parameters(), lr=7e-4, alpha=0.99, eps=1e-5)
  
a2c = A2C(policy, optimizer, value_loss_coef=0.25, entropy_coef=1, DEVICE=DEVICE)

In [12]:
# !pip install wandb --upgrade --quiet
# !wandb login --relogin

import wandb

wandb.login()
wandb.init(project="MM") # id="9dee5ngv", resume=True
wandb.watch(model);



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
critic loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
entropy loss,▂▁▃▁▁█▁▂▁▁▁▁▃▁▁▁▁▂▁▁▁▁▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
policy loss,▁▁▅▁▁█▁▁▁▁▁▁▆▁▁▁▁▂▁▁▁▁▁▆▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
total loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train reward,█▅██▅▄█▄████▇██▇▂▆▄▂▅▂▃▅▃█▅▂▄█▄▅▂▁▄▄▃▄▂▂

0,1
critic loss,1064.45398
entropy loss,-0.0
policy loss,-0.0
total loss,266.11349
train reward,-0.17397


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016753071533332786, max=1.0…

In [13]:
from tqdm.notebook import trange

for i in trange(1, 301):
    a2c.train(strategy, md_train,
              latency=pd.Timedelta(10, 'ms').delta,
              md_latency=pd.Timedelta(10, 'ms').delta,
              count=50,
              train_slice=195_000)
    if i % 500 == 0:
        reward, pnl, trajectory = evaluate(strategy,
                                           md_test,
                                           latency=pd.Timedelta(10, 'ms').delta,
                                           md_latency=pd.Timedelta(10, 'ms').delta)
        wandb.log({
            'val reward': reward,
            'val pnl': pnl,
        })

        torch.save(model.state_dict(), "models/ETH_50step_15_50act_rew_pen.pth")
        
#         a2c.entropy_coef /= 10

  0%|          | 0/300 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [44]:
model = A2CNetwork(n_actions=10).to(DEVICE)
model.load_state_dict(torch.load("models/ETH_50step_15_50act_rew_pen.pth"))
model.eval()

policy = Policy(model)

delay = pd.Timedelta(0.1, 's').delta
hold_time = pd.Timedelta(10, 's').delta
strategy = RLStrategy(policy, ess_df, 1.0,
                      means, stds, delay, hold_time, [ComputeValueTargets(policy)],
                      trade_size=0.01, post_only=True, taker_fee=0.0004, maker_fee=-0.00004)

In [45]:
%%time
strategy.reset()
sim = Sim(md_all[230_000:260_000],
          execution_latency=pd.Timedelta(10, 'ms').delta,
          md_latency=pd.Timedelta(10, 'ms').delta)
with torch.no_grad():
    trades_list, md_list, updates_list, actions_history, trajectory = strategy.run(sim, mode='test')

CPU times: user 2min 8s, sys: 596 ms, total: 2min 8s
Wall time: 2min 9s


In [64]:
# with open('../data/rl_model/trades_list_BTC.pickle', 'wb') as f:
#     pickle.dump(trades_list, f)
    
# with open('../data/rl_model/md_list_BTC.pickle', 'wb') as f:
#     pickle.dump(md_list, f)
    
# with open('../data/rl_model/updates_list_BTC.pickle', 'wb') as f:
#     pickle.dump(updates_list, f)
    
# with open('../data/rl_model/actions_history_BTC.pickle', 'wb') as f:
#     pickle.dump(actions_history, f)
    
# # with open('../data/rl_model/trades_list.pickle', 'rb') as f:
# #     trades_list = pickle.load(f)
    
# # with open('../data/rl_model/md_list.pickle', 'rb') as f:
# #     md_list = pickle.load(f)
    
# # with open('../data/rl_model/updates_list.pickle', 'rb') as f:
# #     updates_list = pickle.load(f)
    
# # with open('../data/rl_model/actions_history.pickle', 'rb') as f:
# #     actions_history = pickle.load(f)

In [46]:
%%time

df = get_pnl(updates_list, post_only=True)
df['receive_ts'] = pd.to_datetime(df['receive_ts'])

CPU times: user 2.36 s, sys: 4.16 ms, total: 2.36 s
Wall time: 2.36 s


In [47]:
%%time

df_fee = get_pnl(updates_list, post_only=True, maker_fee=-0.00004, taker_fee=0.0004)
df_fee['receive_ts'] = pd.to_datetime(df_fee['receive_ts'])

CPU times: user 2.41 s, sys: 3.16 ms, total: 2.41 s
Wall time: 2.42 s


In [48]:
ask_made, bid_made, ask_take, bid_take = get_volumes(trades_list)
ask_made, bid_made, ask_take, bid_take

(9.729999999999837, 2.439999999999992, 1.2300000000000009, 0.02)

In [49]:
fig = make_subplots(rows=3, cols=1, subplot_titles=("Price", "PnL", "Inventory Size"))

fig.add_trace(go.Scatter(x=df.iloc[::100, :]['receive_ts'], y=df.iloc[::100, :]['mid_price'],
                         name='Price'), row=1, col=1)

fig.add_trace(go.Scatter(x=df.iloc[::100, :]['receive_ts'], y=df.iloc[::100, :]['total'],
                         name='PnL without fees'), row=2, col=1)
fig.add_trace(go.Scatter(x=df_fee.iloc[::100, :]['receive_ts'], y=df_fee.iloc[::100, :]['total'],
                         name='PnL including fees'), row=2, col=1)

fig.add_trace(go.Scatter(x=df.iloc[::100, :]['receive_ts'], y=df.iloc[::100, :]['BTC'],
                         name='Inventory Size'), row=3, col=1)

# fig.add_trace(g
# o.Scatter(x=pd.to_datetime(actions[0])[::100], y=actions[2][::100], mode='markers',
#                          marker_color=actions[2][::100],
#                          name='Actions'), row=4, col=1)

fig.update_yaxes(title_text="USD", row=1, col=1)
fig.update_yaxes(title_text="USD", row=2, col=1)
fig.update_yaxes(title_text="BTC", row=3, col=1)
# fig.update_yaxes(title_text="Actions ID", row=4, col=1)

fig.update_layout(title_text="RL Strategy: maker fee = -0.004%", height=700)

# fig.write_html('../docs/RLStrategy.html')
# fig.write_image('../images/results/RLStrategy.jpeg')
# fig.show()

In [50]:
actions = {i: 0 for i in range(11)}
for _, _, action_id in actions_history:
    actions[action_id] += 1

In [51]:
actions

{0: 0, 1: 1, 2: 735, 3: 4, 4: 83, 5: 86, 6: 48, 7: 3, 8: 0, 9: 9842, 10: 0}