In [1]:
import copy
import torch

import torch.nn.functional as F

from torch.distributions import (
    Bernoulli, 
    Categorical, 
    Independent, 
    Multinomial, 
    Normal,
)

from longcapital.rl.utils.distributions import MultivariateHypergeometric
from longcapital.utils.constant import NEG_INF

In [2]:
class MyMultinomial(Multinomial):
    def entropy(self):
        return torch.zeros(self.batch_shape)
    

def test_dist(dist, return_sample=False):
    sample = dist.sample()
    log_prob = dist.log_prob(sample)
    entropy = dist.entropy()
    print(f"sample ({sample.shape}): {sample}")
    print(f"log_prob ({log_prob.shape}): {log_prob}")
    print(f"entropy ({entropy.shape}): {entropy}")
    if return_sample:
        return sample

In [3]:
batch_size = 4
stock_num = 5
topk = 2

In [4]:
# continuous.MetaPPO/MetaDDPG/MetaTD3/MetaSAC
# given a list of stocks, assign each stock a value for ranking (TopkDropoutStrategy) or weighting (WeightStrategy)

loc = torch.randn(batch_size, stock_num)
scale = torch.ones_like(loc)
dist = Independent(Normal(loc=loc, scale=scale), 1)
test_dist(dist)

sample (torch.Size([4, 5])): tensor([[ 0.2421,  0.1619, -0.0146, -2.6176, -0.1047],
        [-0.3999, -0.5017,  2.5170, -1.8666,  0.5309],
        [-0.6478, -0.8725, -0.7073,  0.4213,  0.0070],
        [-1.6563,  3.0381,  1.4929, -1.2735, -1.5603]])
log_prob (torch.Size([4])): tensor([-6.3076, -6.0465, -5.4885, -9.7413])
entropy (torch.Size([4])): tensor([7.0947, 7.0947, 7.0947, 7.0947])


In [5]:
# discrete.PPO
# given the state and a set of params candidates (n_drop), choose one param for trading

n_drop_list = list(range(topk))
probs = torch.rand(batch_size, len(n_drop_list))
dist = Categorical(probs)
test_dist(dist)

sample (torch.Size([4])): tensor([0, 1, 1, 1])
log_prob (torch.Size([4])): tensor([-0.0508, -0.6032, -0.4094, -0.5834])
entropy (torch.Size([4])): tensor([0.1971, 0.6887, 0.6383, 0.6864])


In [6]:
# discrete.MultiBinaryMetaPPO
# given a list of stocks, for each stock decide whether buy or not (each stock is independent)

logits = torch.randn(batch_size, stock_num)
probs = F.sigmoid(logits)
dist = Independent(Bernoulli(probs), 1)
sample = test_dist(dist, return_sample=True)
# log_prob
log_prob = (sample * probs + (1 - sample) * (1 - probs)).log().sum(axis=-1)
print(log_prob)

sample (torch.Size([4, 5])): tensor([[1., 1., 1., 0., 0.],
        [0., 0., 1., 0., 0.],
        [1., 0., 1., 1., 0.],
        [0., 1., 0., 0., 0.]])
log_prob (torch.Size([4])): tensor([-2.8576, -2.9559, -2.4545, -3.1865])
entropy (torch.Size([4])): tensor([3.2848, 3.3272, 2.7741, 3.1801])
tensor([-2.8576, -2.9559, -2.4545, -3.1865])




In [7]:
# discrete.StepByStepMetaPPO
# given a list of stocks, repeat topk times to select stock one by one to buy WITHOUT replacement
# if the state is not changed, this is equivalent to TopkMetaPPO which selects topk all at once

logits_ = copy.deepcopy(logits)
for i in range(topk):
    dist = Categorical(logits=logits_)
    sample = test_dist(dist, return_sample=True)
    # mask out selected ones
    logits_.scatter_(1, sample.unsqueeze(1), NEG_INF)

sample (torch.Size([4])): tensor([2, 2, 4, 2])
log_prob (torch.Size([4])): tensor([-1.7199, -0.9449, -4.0417, -0.7596])
entropy (torch.Size([4])): tensor([1.5366, 1.4922, 1.2386, 1.3566])
sample (torch.Size([4])): tensor([3, 4, 2, 1])
log_prob (torch.Size([4])): tensor([-1.3575, -1.1661, -0.6317, -0.7316])
entropy (torch.Size([4])): tensor([1.2992, 1.3480, 1.1707, 1.2506])


In [8]:
# discrete.TopkMetaPPO
# given a list of stocks, repeat topk times to select which stock to buy WITHOUT replacement

probs = F.softmax(logits, dim=1)
dist = MultivariateHypergeometric(probs=probs, topk=topk)
test_dist(dist)

sample (torch.Size([4, 2])): tensor([[1, 3],
        [2, 3],
        [2, 1],
        [2, 1]])
log_prob (torch.Size([4])): tensor([-2.3067, -2.2749, -1.7333, -1.4913])
entropy (torch.Size([4])): tensor([1.5366, 1.4922, 1.2386, 1.3566])


In [9]:
# discrete.WeightMetaPPO
# given a list of stocks, repeat topk times to select which stock to buy WITH replacement
# or given a budget (e.g., total_count=topk), chose one stock to buy each time

probs = F.softmax(logits, dim=1)
dist = MyMultinomial(probs=probs, total_count=topk)
test_dist(dist)

sample (torch.Size([4, 5])): tensor([[0., 1., 1., 0., 0.],
        [0., 0., 0., 2., 0.],
        [0., 1., 0., 1., 0.],
        [1., 1., 0., 0., 0.]])
log_prob (torch.Size([4])): tensor([-2.1648, -3.6443, -2.6480, -3.2730])
entropy (torch.Size([4])): tensor([0., 0., 0., 0.])
