In [1]:
import copy
import torch

import numpy as np

from torch.distributions import (
    Bernoulli, 
    Categorical, 
    Independent, 
    Multinomial, 
    Normal,
)

from longcapital.rl.utils.distributions import MultivariateCategorical, MultivariateHypergeometric
from longcapital.utils.constant import NEG_INF

In [2]:
class MyMultinomial(Multinomial):
    def entropy(self):
        return torch.zeros(self.batch_shape)
    

def test_dist(dist, return_sample=False):
    sample = dist.sample()
    log_prob = dist.log_prob(sample)
    entropy = dist.entropy()
    print(f"sample ({sample.shape}): {sample}")
    print(f"log_prob ({log_prob.shape}): {log_prob}")
    print(f"entropy ({entropy.shape}): {entropy}")
    if return_sample:
        return sample

In [3]:
batch_size = 4
stock_num = 5
topk = 2

In [4]:
# continuous.MetaPPO/MetaDDPG/MetaTD3/MetaSAC
# given a list of stocks, assign each stock a value for ranking (TopkDropoutStrategy) or weighting (WeightStrategy)

loc = torch.randn(batch_size, stock_num)
scale = torch.ones_like(loc)
dist = Independent(Normal(loc=loc, scale=scale), 1)
test_dist(dist)

sample (torch.Size([4, 5])): tensor([[-0.3329,  0.9752, -0.6390,  2.8928,  0.0667],
        [-2.1291, -2.1532, -1.3933,  0.9241,  0.2449],
        [ 1.4556, -1.1203, -0.6360, -2.2502, -2.0621],
        [ 2.0791, -1.5194,  1.5215, -0.5584,  0.2370]])
log_prob (torch.Size([4])): tensor([-6.8761, -7.7059, -6.8598, -7.1360])
entropy (torch.Size([4])): tensor([7.0947, 7.0947, 7.0947, 7.0947])


In [5]:
# discrete.PPO
# given the state and a set of params candidates (n_drop), choose one param for trading

n_drop_list = list(range(topk))
probs = torch.rand(batch_size, len(n_drop_list))
dist = Categorical(probs)
test_dist(dist)

sample (torch.Size([4])): tensor([0, 1, 1, 1])
log_prob (torch.Size([4])): tensor([-1.0238, -0.0733, -0.7408, -0.3879])
entropy (torch.Size([4])): tensor([0.6530, 0.2554, 0.6921, 0.6280])


In [6]:
# discrete.MultiPPO
# given the state and multiple sets of params candidates (n_drop, hold_thresh), choose one set of params for trading

n_drop_list = list(range(topk))
hold_thresh_list = list(range(1, topk))
nvec = np.array([len(n_drop_list), len(hold_thresh_list)])
logits = torch.randn(batch_size, nvec.sum())
dist = MultivariateCategorical(nvec=nvec, logits=logits)
test_dist(dist)

sample (torch.Size([4, 2])): tensor([[1, 0],
        [0, 0],
        [1, 0],
        [0, 0]])
log_prob (torch.Size([4])): tensor([-0.2218, -0.1281, -0.5166, -1.7666])
entropy (torch.Size([4])): tensor([0.4989, 0.3674, 0.6744, 0.4573])


In [7]:
# discrete.MultiBinaryMetaPPO
# given a list of stocks, for each stock decide whether buy or not (each stock is independent)

logits = torch.randn(batch_size, stock_num)
probs = torch.sigmoid(logits)
dist = Independent(Bernoulli(probs), 1)
sample = test_dist(dist, return_sample=True)
# log_prob
log_prob = (sample * probs + (1 - sample) * (1 - probs)).log().sum(axis=-1)
print(log_prob)

sample (torch.Size([4, 5])): tensor([[0., 0., 0., 1., 1.],
        [1., 0., 1., 1., 1.],
        [0., 1., 1., 0., 1.],
        [0., 1., 0., 1., 1.]])
log_prob (torch.Size([4])): tensor([-1.2924, -2.6365, -3.9904, -3.9840])
entropy (torch.Size([4])): tensor([2.4285, 2.9025, 2.8533, 3.0657])
tensor([-1.2924, -2.6365, -3.9904, -3.9840])


In [8]:
# discrete.StepByStepMetaPPO
# given a list of stocks, repeat topk times to select stock one by one to buy WITHOUT replacement
# if the state is not changed, this is equivalent to TopkMetaPPO which selects topk all at once

logits_ = copy.deepcopy(logits)
for i in range(topk):
    dist = Categorical(logits=logits_)
    sample = test_dist(dist, return_sample=True)
    # mask out selected ones
    logits_.scatter_(1, sample.unsqueeze(1), NEG_INF)

sample (torch.Size([4])): tensor([1, 2, 2, 4])
log_prob (torch.Size([4])): tensor([-4.0210, -1.2195, -1.4670, -1.3039])
entropy (torch.Size([4])): tensor([0.4880, 1.3387, 1.3828, 1.5363])
sample (torch.Size([4])): tensor([4, 0, 1, 3])
log_prob (torch.Size([4])): tensor([-0.1087, -0.6940, -0.5974, -0.9569])
entropy (torch.Size([4])): tensor([0.4054, 1.0386, 1.0954, 1.3062])


In [9]:
# discrete.TopkMetaPPO
# given a list of stocks, repeat topk times to select which stock to buy WITHOUT replacement

probs = torch.softmax(logits, dim=1)
dist = MultivariateHypergeometric(probs=probs)
test_dist(dist)

sample (torch.Size([4, 5])): tensor([[4, 3, 1, 2, 0],
        [4, 2, 0, 3, 1],
        [3, 1, 2, 0, 4],
        [2, 0, 4, 3, 1]])
log_prob (torch.Size([4])): tensor([-1.9890, -3.0844, -3.3981, -4.6636])
entropy (torch.Size([4])): tensor([0.4880, 1.3387, 1.3828, 1.5363])


In [10]:
# discrete.WeightMetaPPO
# given a list of stocks, repeat topk times to select which stock to buy WITH replacement
# or given a budget (e.g., total_count=topk), chose one stock to buy each time

probs = torch.softmax(logits, dim=1)
dist = MyMultinomial(probs=probs, total_count=topk)
test_dist(dist)

sample (torch.Size([4, 5])): tensor([[0., 0., 0., 0., 2.],
        [1., 0., 0., 0., 1.],
        [0., 1., 0., 0., 1.],
        [0., 1., 0., 1., 0.]])
log_prob (torch.Size([4])): tensor([-0.2535, -1.6490, -2.8049, -2.7653])
entropy (torch.Size([4])): tensor([0., 0., 0., 0.])
