## Deribit market agent notebook v0.01

Imports

In [103]:
import torch
from torch import nn
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
from pathlib import Path
from collections import deque
import random, time, datetime, os, copy
import datetime as dt
from api_client import Client
from historical_data import get_historical_data
from metric_logger import MetricLogger
import nest_asyncio
from IPython.display import clear_output

Environment loading

In [104]:
#initialise trading client
client = Client()
nest_asyncio.apply()

エージェント neural net

In [105]:
# class SelectItem(nn.Module):
#     """
#     for picking out an element from a tuple/list
#     at index item_index, for any layer which outputs such data
#     """
#     def __init__(self, item_index):
#         super().__init__()
#         self._name = 'selectitem'
#         self.item_index = torch.as_tensor(item_index)

#     def forward(self, inputs):
#         x, _ = inputs
#         return x


class Agent47Net(nn.Module):
    def __init__(self, input_dim, output_dim, batch_size):
        super().__init__()
        
        self.batch_size = batch_size
        
        #Training model
        self.online = nn.Sequential(
            nn.Conv2d(in_channels=input_dim, out_channels=100, kernel_size=2, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=100, out_channels=50, kernel_size=2, stride=2, padding=2),
            nn.ReLU(),
            nn.Flatten(0,1)
        )
        self.online_rnn = nn.LSTM(input_size=32, num_layers=3, hidden_size=self.batch_size),
        self.online_out = nn.Sequential(
            nn.Flatten(0,2),
            nn.Linear(38400, output_dim)
        )
        
        #frozen target Q
        self.target = copy.deepcopy(self.online)
        self.target_rnn = copy.deepcopy(self.online_rnn)
        self.target_out = copy.deepcopy(self.online_out)
        
        for p in self.target.parameters():
            p.requires_grad = False   
        
    
    def forward(self, inputs, model):
        inputs = inputs.float()
        x = inputs
        if model == 'online':
            x = self.online(x)
            x, _ = self.online_rnn(x)
            x = self.online_out(x)
            return x
        elif model == 'target':
            x = self.target(x)
            x, _ = self.target_rnn(x)
            x = self.target_out(x)
            return x

    


エージェント

In [106]:
class Agent47:
    def __init__(self, state_dim, action_dim, save_dir, load=False, load_file=''):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.save_dir = save_dir
        self.load = load
        
        self.exploration_rate = 1
        self.decay_rate = 0.9995
        self.min_rate = 0.15
        self.curr_step = 0
        
        self.memory = deque(maxlen=10000)
        self.batch_size = 16
        
        self.learn_every = 1  # no. of experiences between updates to Q_online
        self.sync_every = 50  # no. of experiences between Q_target & Q_online sync
        self.save_every = 250 #no. of experiences between saving the model
        self.gamma = 0.9
        
        device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
        self.net = Agent47Net(self.state_dim, self.action_dim, self.batch_size)
        self.net = self.net.to(device)
                
        self.loss_fn = torch.nn.SmoothL1Loss()
        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=0.00025)
        
        if self.load:
            checkpoint = torch.load(load_file)
            self.net.load_state_dict(checkpoint['model_state_dict'])
            self.exploration_rate = checkpoint['exploration_rate']
            self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    def act(self, state):
        """
        Return the action to do at given state
        """
        if np.random.rand() < self.exploration_rate:
            action_id = np.random.randint(self.action_dim)
        else:
            state = state.__array__() 
            state = torch.tensor(state).cuda()
            state = state.unsqueeze(0)
            state = torch.cat(self.batch_size * [state])
            print("act shape", state.shape)
            action_vals = self.net(state, model="online") #action scores from NN
            action_id = torch.argmax(action_vals, axis=0).item() #value of action with highest score
        
        if self.exploration_rate >= self.min_rate:
            self.exploration_rate -= 1-self.decay_rate #linear decay
        else:
            self.exploration_rate = self.min_rate
        
        self.curr_step += 1
        return action_id #action to take at this step
    
    def cache(self, state, next_state, action, reward):
        """
        Store experience to memory
        """
        state = state.__array__()
        next_state = next_state.__array__()
        
        state = torch.tensor(state).cuda()
        next_state = torch.tensor(next_state).cuda()
        action = torch.tensor([action]).cuda()
        reward = torch.tensor([reward]).cuda()

        self.memory.append((state, next_state, action, reward))
    
    def recall(self):
        """
        Retrieve/"remember" experiences
        """
        batch = random.sample(self.memory, self.batch_size)
        state, next_state, action, reward = map(torch.stack, zip(*batch))
        return state, next_state, action.squeeze(), reward.squeeze()       
    
    def td_estimate(self, state, action):
        """
        temporal difference Q estimate at current state
        """
        current_Q = self.net(state, model="online")[action]
        
        return current_Q

    def td_target(self, reward, next_state):
        """
        get td target
        """
        next_state_Q = self.net(next_state, model="online")
        best_action = torch.argmax(next_state_Q, axis=0)
        next_Q = self.net(next_state, model="target")[best_action]
        return (reward + (self.gamma * next_Q).float())     
    
    def update_Q(self, td_estimate, td_target):
        """
        update parameters
        """
        loss = self.loss_fn(td_estimate, td_target)
        self.optimizer.zero_grad()
        #self.net.train()
        loss.backward()
        self.optimizer.step()
        #self.net.eval()
        return loss.item()
    
    def sync_Q_target(self):
        """
        sync Q target with online periodically instead of backpropagation
        """
        self.net.target.load_state_dict(self.net.online.state_dict())

    def save(self):
        """
        save model in directory
        """
        now = dt.datetime.now().strftime('%B-%d-%Y')
        save_path = (
            self.save_dir / f"saved_agents/with_position/{now}/agent47_net_{int(self.curr_step // self.save_every)}.chkpt"
        )
        torch.save(
            {
                'model_state_dict': self.net.state_dict(),
                'optimizer_state_dict': self.optimizer.state_dict(),
                'exploration_rate': self.exploration_rate
            },
            save_path,
        )
        print(f"Agent47 saved to {save_path} at step {self.curr_step}")
        
    def learn(self):
        """
        Learning steps
        """
        if self.curr_step % self.sync_every == 0:
            self.sync_Q_target()

        if self.curr_step % self.save_every == 0:
            self.save()
            
        if self.curr_step < 4 * self.batch_size:
            return None, None
        
        if self.curr_step % self.learn_every != 0:
            return None, None

        # Sample from memory
        state, next_state, action, reward = self.recall()

        # Get TD Estimate
        td_est = self.td_estimate(state, action)

        # Get TD Target
        td_tgt = self.td_target(reward, next_state)
        
        # Backpropagate loss through Q_online 
        loss = self.update_Q(td_est, td_tgt)

        return (td_est.mean().item(), loss) #(estimate of Q, loss)

### Training

In [107]:
torch.backends.cudnn.enabled=False

t = torch.cuda.get_device_properties(0).total_memory
print("GPU available:", torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print("Device total memory: {} GB".format(round(t/1024**3,2)))

GPU available: True
NVIDIA GeForce RTX 3080
Device total memory: 9.75 GB


In [108]:
def get_reward(equity1):
    """
    return reward value
    """
    summary = client.get_account_summary()
    equity2 = float(summary["equity"])
    return (equity2-equity1)
    
def get_state():
    """
    get state at time of function call
    currently only order book
    """
    curr_time = round(time.time())
    now = dt.datetime.now().strftime('%B %d, %Y %H:%M:%S')
    tick_data = get_historical_data(t1=curr_time-120, t2=curr_time)
    order_book = client.get_order_book(instrument='BTC-PERPETUAL', depth=60)
    position_size = client.get_position()["size"]
    open_orders = client.get_open_orders()
    bids = order_book["bids"]
    asks = order_book["asks"]
    position_and_orders = [[position_size, 0]] * 60
    for i in range(len(open_orders)):
        position_and_orders[i][1] = open_orders["profit_loss"]
    
    return [bids, asks, position_and_orders]

def print_memory_usage(current_actions, episode):
    """
    print GPU memory usage by CUDA
    """
    r = torch.cuda.memory_reserved(0)
    a = torch.cuda.memory_allocated(0)
    f = r-a  # free inside reserved
    print(torch.cuda.get_device_name(0))
    print("Reserved memory: {} GB".format(round(r/1024**3,3)))
    print("Allocated memory: {} GB".format(round(a/1024**3,3)))
    print("Amount free in reserved: {} GB".format(round(f/1024**3,3)))

def print_state(action, reward, q, loss):
    print("action: ", action)
    print("reward: ", reward)
    print("estimated q: ", q)
    print("loss: ", loss)



Model/agent init

In [109]:
save_dir = Path("checkpoints") 
now = dt.datetime.now().strftime('%B-%d-%Y')
model_path = save_dir / f"saved_agents/with_position/{now}/"
if not os.path.isdir(model_path):
    os.mkdir(model_path)
    os.mkdir(model_path / "logs")
    f = open(model_path/ "logs/log", "w")
    f.close()
agent = Agent47(state_dim=3, action_dim=7, save_dir=save_dir, load=False, 
    load_file='')

Visualise model

In [110]:
from torch.utils.tensorboard import SummaryWriter
from torchvision import models
from torchsummary import summary

state = np.array(get_state(), dtype="float64").T
state = state.__array__() 
state = torch.tensor(state).cuda()
state = state.unsqueeze(0)
state = torch.movedim(state, 3, 1)
state = torch.cat(agent.batch_size * [state])
test_net = agent.net
print("state shape", state.shape)
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
test_net.to(device)
output = test_net(state, model="online")
summary(test_net, state, "online")
print(test_net)
print("output shape", output.shape)

state shape torch.Size([16, 3, 2, 60])


TypeError: 'tuple' object is not callable

Playing the game

In [None]:
from functools import partial

logger = MetricLogger(model_path)
actions = [partial(client.order, instrument_name="BTC-PERPETUAL", side="long", amount=2500, order_type="market"), 
           partial(client.order, instrument_name="BTC-PERPETUAL", side="short", amount=2500, order_type="market"),
           partial(client.make_futures_order, side="long", instrument="BTC-PERPETUAL", amount=2500),
           partial(client.make_futures_order, side="short", instrument="BTC-PERPETUAL", amount=2500),
           partial(client.close_position, instrument="BTC-PERPETUAL", order_type="limit"),
           client.cancel_all_orders,
           client.do_nothing
          ]

episodes = 5
action_num = 1000 #1000 actions in an episode
equity = client.get_account_summary()["equity"]

for e in range(episodes): 
    #play the game:
    step = 0
    #get state from deribit
    state = np.array(get_state(), dtype="float64").T

    while step < action_num:
                
        clear_output(wait=True)
        print("Action {} in episode {}".format(step + 1, e + 1))
        print_memory_usage(step, e + 1) 
        
        #agent runs on the state
        action = agent.act(state) #action id
        try:
            print("Action index: {}".format(action))
            actions[action]()
        except:
            print("Empty or error, continuing until result")
            continue
        try:
            next_state = np.array(get_state(), dtype="float64").T
        except:
            print("not enough order book values (probably). Continuing")
            continue
            
        reward = get_reward(equity) #reward calculated as total equity increase over the entire session
        agent.cache(state, next_state, action, reward)
        q, loss = agent.learn()
        logger.log_step(reward, loss, q, action, step)
        
        state = next_state
        print_state(action, reward, q, loss)
        step += 1

    logger.log_episode()
    logger.record(episode=e, epsilon=agent.exploration_rate, step=agent.curr_step)
