## Deribit market agent notebook v0.01

Imports

In [9]:
import torch
from torch import nn
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
from pathlib import Path
from collections import deque
import random, time, datetime, os, copy
import datetime as dt
from api_client import Client
from historical_data import get_historical_data
from metric_logger import MetricLogger
import nest_asyncio
from IPython.display import clear_output


Environment loading

In [10]:
#initialise trading client
client = Client()
nest_asyncio.apply()

エージェント neural net

In [11]:
class SelectItem(nn.Module):
    """
    for picking out an element from a tuple/list
    at index item_index, for any layer which outputs such data
    """
    def __init__(self, item_index):
        super().__init__()
        self._name = 'selectitem'
        self.item_index = item_index

    def forward(self, inputs):
        x, _ = inputs
        return x


class Agent47Net(nn.Module):
    def __init__(self, input_dim, output_dim, batch_size):
        super().__init__()
        
        self.online = nn.Sequential(
            nn.Conv2d(in_channels=input_dim, out_channels=100, kernel_size=2, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=100, out_channels=50, kernel_size=2, stride=2, padding=2),
            nn.ReLU(),
            nn.Flatten(0,1),
            nn.LSTM(input_size=3, num_layers=3, hidden_size=64),
            SelectItem(1),
            nn.Flatten(0,2),
            #nn.Linear(86400, output_dim)
        )
        
        #frozen target Q
        self.target = copy.deepcopy(self.online)
        
        for p in self.target.parameters():
            p.requires_grad = False   
        
    
    def forward(self, inputs, model):
        inputs = inputs.float()
        if model == 'online':
            return self.online(inputs)
        elif model == 'target':
            return self.target(inputs)
    


エージェント

In [12]:
class Agent47:
    def __init__(self, state_dim, action_dim, save_dir):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.save_dir = save_dir
        
        self.exploration_rate = 1
        self.decay_rate = 0.99975
        self.min_rate = 0.1
        self.curr_step = 0
        
        self.save_every = 100
        
        self.memory = deque(maxlen=10000)
        self.batch_size = 3
        
        device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
        self.net = Agent47Net(self.state_dim, self.action_dim, self.batch_size)
        self.net = self.net.to(device)
        
        self.loss_fn = torch.nn.SmoothL1Loss()
        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=0.00025)
        
        self.learn_every = 1  # no. of experiences between updates to Q_online
        self.sync_every = 100  # no. of experiences between Q_target & Q_online sync
        
        self.gamma = 0.9

    def act(self, state):
        """
        Return the action to do at given state
        """
        if np.random.rand() < self.exploration_rate:
            action_id = np.random.randint(self.action_dim)
        else:
            state = state.__array__() 
            state = torch.tensor(state).cuda()
            state = state.unsqueeze(0)
            action_vals = self.net(state, model="online") #action scores from NN
            print(action_vals.shape)
            action_id = torch.argmax(action_vals, axis=0).item() #value of action with highest score
        
        if self.exploration_rate >= self.min_rate:
            self.exploration_rate -= 1-self.decay_rate #linear decay
        
        self.curr_step += 1
        return action_id #action to take at this step
    
    def cache(self, state, next_state, action, reward):
        """
        Store experience to memory
        """
        state = state.__array__()
        next_state = next_state.__array__()
        
        state = torch.tensor(state).cuda()
        next_state = torch.tensor(next_state).cuda()
        action = torch.tensor([action]).cuda()
        reward = torch.tensor([reward]).cuda()

        self.memory.append((state, next_state, action, reward))
    
    def recall(self):
        """
        Retrieve/"remember" experiences
        """
        batch = random.sample(self.memory, self.batch_size)
        state, next_state, action, reward = map(torch.stack, zip(*batch))
        return state, next_state, action.squeeze(), reward.squeeze()       
    
    def td_estimate(self, state, action):
        """
        temporal difference Q estimate at current state
        """
        current_Q = self.net(state, model="online")[action]
        return current_Q

    def td_target(self, reward, next_state):
        """
        get td target
        """
        next_state_Q = self.net(next_state, model="online")
        best_action = torch.argmax(next_state_Q, axis=0)
        next_Q = self.net(next_state, model="target")[best_action]
        return (reward + (self.gamma * next_Q).float())     
    
    def update_Q(self, td_estimate, td_target):
        """
        update parameters
        """
        loss = self.loss_fn(td_estimate, td_target)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss.item()
    
    def sync_Q_target(self):
        """
        sync Q target with online periodically instead of backpropagation
        """
        self.net.target.load_state_dict(self.net.online.state_dict())

    def save(self):
        """
        save model in directory
        """
        save_path = (
            self.save_dir / f"agent47_net_{int(self.curr_step // self.save_every)}.chkpt"
        )
        torch.save(
            dict(model=self.net.state_dict(), exploration_rate=self.exploration_rate),
            save_path,
        )
        print(f"Agent47 saved to {save_path} at step {self.curr_step}")
        
    def learn(self):
        """
        Learning steps
        """
        if self.curr_step % self.sync_every == 0:
            self.sync_Q_target()

        if self.curr_step % self.save_every == 0:
            self.save()
            
        if self.curr_step < 1 * self.batch_size:
            return None, None
        
        if self.curr_step % self.learn_every != 0:
            return None, None

        # Sample from memory
        state, next_state, action, reward = self.recall()

        # Get TD Estimate
        td_est = self.td_estimate(state, action)

        # Get TD Target
        td_tgt = self.td_target(reward, next_state)

        # Backpropagate loss through Q_online 
        loss = self.update_Q(td_est, td_tgt)

        return (td_est.mean().item(), loss) #(estimate of Q, loss)

### Training

In [13]:
t = torch.cuda.get_device_properties(0).total_memory
print("GPU available:", torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print("Device total memory: {} GB".format(round(t/1024**3,2)))

GPU available: True
NVIDIA GeForce RTX 3080
Device total memory: 9.75 GB


In [14]:
def get_reward(equity1):
    """
    return reward value
    """
    summary = client.get_account_summary()
    equity2 = float(summary["equity"])
    return (equity2-equity1)
    
def get_state():
    """
    get state at time of function call
    currently only order book
    """
    curr_time = round(time.time())
    now = dt.datetime.now().strftime('%B %d, %Y %H:%M:%S')
    tick_data = get_historical_data(t1=curr_time-120, t2=curr_time)
    order_book = client.get_order_book(instrument='BTC-PERPETUAL', depth=50)
    bids = order_book['bids']
    asks = order_book['asks']
    return [bids, asks]

def print_memory_usage(current_actions, episode):
    """
    print GPU memory usage by CUDA
    """
    r = torch.cuda.memory_reserved(0)
    a = torch.cuda.memory_allocated(0)
    f = r-a  # free inside reserved
    clear_output(wait=True)
    print(torch.cuda.get_device_name(0))
    print("Reserved memory: {} GB".format(round(r/1024**3,3)))
    print("Allocated memory: {} GB".format(round(a/1024**3,3)))
    print("Amount free in reserved: {} GB".format(round(f/1024**3,3)))
    print("Action {} in episode {}".format(current_actions, episode))

def print_state(action, reward, q, loss, state):
    print("action: ", action)
    print("reward: ", reward)
    print("estimated q: ", q)
    print("loss: ", loss)
    print("state/orderbook: ", state)



Model visualisation

In [15]:
from torch.utils.tensorboard import SummaryWriter
from torchvision import models
from torchsummary import summary

state = np.array(get_state(), dtype="float64").T
state = state.__array__() 
state = torch.tensor(state).cuda()
state = state.unsqueeze(0)
test_net = Agent47Net(2,6,32)
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
test_net.to(device)
output = test_net(state, model="online")
summary(test_net)
summary(test_net, state, "online")
print(test_net)
print(output.shape)

Layer (type:depth-idx)                   Param #
├─Sequential: 1-1                        --
|    └─Conv2d: 2-1                       900
|    └─ReLU: 2-2                         --
|    └─Conv2d: 2-3                       20,050
|    └─ReLU: 2-4                         --
|    └─Flatten: 2-5                      --
|    └─LSTM: 2-6                         84,224
|    └─SelectItem: 2-7                   --
|    └─Flatten: 2-8                      --
├─Sequential: 1-2                        --
|    └─Conv2d: 2-9                       (900)
|    └─ReLU: 2-10                        --
|    └─Conv2d: 2-11                      (20,050)
|    └─ReLU: 2-12                        --
|    └─Flatten: 2-13                     --
|    └─LSTM: 2-14                        (84,224)
|    └─SelectItem: 2-15                  --
|    └─Flatten: 2-16                     --
Total params: 210,348
Trainable params: 105,174
Non-trainable params: 105,174
Layer (type:depth-idx)                   Output Shape    

Playing the game

In [None]:
from functools import partial

save_dir = Path("checkpoints") 
agent = Agent47(state_dim=2, action_dim=6, save_dir=save_dir)
logger = MetricLogger(save_dir)
actions = [partial(client.order, instrument_name="BTC-PERPETUAL", side="long", amount=500, order_type="market"), 
           partial(client.order, instrument_name="BTC-PERPETUAL", side="short", amount=500, order_type="market"),
           partial(client.make_futures_order, side="long", instrument="BTC-PERPETUAL", amount=500),
           partial(client.make_futures_order, side="short", instrument="BTC-PERPETUAL", amount=500),
           client.cancel_all_orders,
           client.do_nothing
          ]

episodes = 1
action_num = 1000 #1000 actions in an episode

for e in range(episodes): 
    #play the game:
    step = 0
    
    state = np.array(get_state(), dtype="float64").T
    #get state from deribit
    equity = client.get_account_summary()["equity"]
    equity_list = [0]*200

    while step < action_num:
        
        #agent runs on the state
        action = agent.act(state) #action id
        try:
            actions[action]()
        except:
            print("Empty or error, continuing until result")
            continue
        try:
            next_state = np.array(get_state(), dtype="float64").T
        except:
            print("not enough order book values (probably). Continuing")
            continue
            
        reward = get_reward(equity) #reward calculated as total equity increase over episode
        agent.cache(state, next_state, action, reward)
        q, loss = agent.learn()
        logger.log_step(reward, loss, q, action, step)
        
        state = next_state
        
        step += 1
        print_state(action, reward, q, loss, state)
        print_memory_usage(step, e + 1)
        if agent.batch_size % step == 1 and step != 2:
            break
    

    logger.log_episode()
    logger.record(episode=e, epsilon=agent.exploration_rate, step=agent.curr_step)


NVIDIA GeForce RTX 3080
Reserved memory: 0.08 GB
Allocated memory: 0.009 GB
Amount free in reserved: 0.071 GB
Action 12 in episode 1
{"jsonrpc":"2.0","id":0,"result":{"trades":[{"trade_seq":56097574,"trade_id":"83167346","timestamp":1633239477398,"tick_direction":2,"state":"filled","self_trade":false,"reduce_only":false,"profit_loss":7.6e-7,"price":48106.5,"post_only":false,"order_type":"market","order_id":"6668772211","matching_id":null,"mark_price":48079.6,"liquidity":"T","label":"null","instrument_name":"BTC-PERPETUAL","index_price":48067.45,"fee_currency":"BTC","fee":0.00000104,"direction":"sell","amount":100.0},{"trade_seq":56097575,"trade_id":"83167347","timestamp":1633239477398,"tick_direction":2,"state":"filled","self_trade":false,"reduce_only":false,"profit_loss":0.00000124,"price":48102.5,"post_only":false,"order_type":"market","order_id":"6668772211","matching_id":null,"mark_price":48079.6,"liquidity":"T","label":"null","instrument_name":"BTC-PERPETUAL","index_price":48067.4

Task exception was never retrieved
future: <Task finished name='Task-460' coro=<call_api_private() done, defined at /home/saze/Documents/machine_learning/market_agent/call_api.py:16> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "<ipython-input-16-786b4849f4ed>", line 31, in <module>
    actions[action]()
  File "/home/saze/Documents/machine_learning/market_agent/api_client.py", line 99, in cancel_all_orders
    result = asyncio.get_event_loop().run_until_complete(call_api_private(msg=json.dumps(self.msg), auth_msg=json.dumps(self.auth_msg)))
  File "/home/saze/anaconda3/lib/python3.8/site-packages/nest_asyncio.py", line 90, in run_until_complete
    self._run_once()
  File "/home/saze/anaconda3/lib/python3.8/site-packages/nest_asyncio.py", line 127, in _run_once
    handle._run()
  File "/home/saze/anaconda3/lib/python3.8/site-packages/nest_asyncio.py", line 196, in run
    ctx.run(self._callback, *self._args)
  File "/home/saze/anaconda3/lib/python3.8/async

Empty or error, continuing until result
Empty or error, continuing until result
Empty or error, continuing until result
     volume         ticks status     open      low     high     cost    close  \
0  0.370511  1.633239e+09     ok  48094.0  48084.0  48099.5  17820.0  48084.0   
1  0.316180  1.633239e+09     ok  48073.0  48065.0  48108.0  15200.0  48098.5   

            timestamp  
0 2021-10-03 05:36:00  
1 2021-10-03 05:37:00  
Empty or error, continuing until result
{"jsonrpc":"2.0","id":0,"result":{"trades":[{"trade_seq":56097584,"trade_id":"83167367","timestamp":1633239491124,"tick_direction":1,"state":"filled","self_trade":false,"reduce_only":false,"profit_loss":0.0,"price":48107.5,"post_only":false,"order_type":"market","order_id":"6668772650","matching_id":null,"mark_price":48103.29,"liquidity":"T","label":"null","instrument_name":"BTC-PERPETUAL","index_price":48084.21,"fee_currency":"BTC","fee":0.0000052,"direction":"buy","amount":500.0}],"order":{"web":false,"time_in_force"