In [1]:
import torch

from longcapital.rl.utils.distributions import MultivariateHypergeometric
from longcapital.utils.constant import NEG_INF

In [2]:
class MyMultinomial(torch.distributions.Multinomial):
    def entropy(self):
        return torch.tensor(0.0)
    

def test_dist(dist):
    sample = dist.sample()
    print(f"sample: {sample}")
    print(f"log_prob: {dist.log_prob(sample)}")
    print(f"entropy: {dist.entropy()}")

In [3]:
stock_num = 20
topk = 10

In [4]:
# continuous.MetaPPO/MetaDDPG/MetaTD3/MetaSAC
# given a list of stocks, assign each stock a value for ranking (TopkDropoutStrategy) or weighting (WeightStrategy)
# (each stock is independent)

loc = torch.randn(stock_num).unsqueeze(0)
scale = torch.ones_like(loc).unsqueeze(0)
dist = torch.distributions.Independent(torch.distributions.Normal(loc=loc, scale=scale), 1)
test_dist(dist)

sample: tensor([[[ 0.4384,  2.0295,  0.8916,  0.2447,  2.5761,  0.0384, -1.3316,
           0.4999, -1.2367,  1.1050,  0.1799, -2.1011,  0.7804, -1.5405,
          -2.9782, -0.8935,  1.0627,  1.6370,  1.6496, -3.2920]]])
log_prob: tensor([[-28.3119]])
entropy: tensor([[28.3788]])


In [5]:
# discrete.PPO
# given the state and a set of params candidates (n_drop), choose one param for trading

n_drop_list = list(range(topk))
probs = torch.rand(len(n_drop_list))
dist = torch.distributions.Categorical(probs)
test_dist(dist)

sample: 5
log_prob: -1.401569128036499
entropy: 1.8952332735061646


In [6]:
# discrete.MultiBinaryMetaPPO
# given a list of stocks, for each stock decide whether buy or not (each stock is independent)

probs = torch.rand(stock_num)
dist = torch.distributions.Bernoulli(probs)
test_dist(dist)

sample: tensor([1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0.,
        0., 0.])
log_prob: tensor([-0.6646, -0.4393, -0.0883, -0.3198, -0.2408, -0.2539, -1.0633, -1.0930,
        -0.1103, -0.2586, -1.2609, -0.3197, -0.3170, -1.8816, -0.0276, -0.0739,
        -0.0171, -0.0279, -1.2286, -0.9519])
entropy: tensor([0.6927, 0.6508, 0.2896, 0.5869, 0.5192, 0.5322, 0.6445, 0.6378, 0.3348,
        0.5367, 0.5961, 0.5868, 0.5849, 0.4267, 0.1250, 0.2569, 0.0858, 0.1261,
        0.6045, 0.6669])


In [7]:
# discrete.StepByStepMetaPPO
# given a list of stocks, repeat topk times to select stock one by one to buy WITHOUT replacement

logits = torch.randn(stock_num)
selected = []
for i in range(topk):
    logits[selected] = NEG_INF
    dist = torch.distributions.Categorical(logits=logits)
    test_dist(dist)
    selected.append(int(logits.argmax(-1)))

sample: 3
log_prob: -1.5412710905075073
entropy: 2.4693713188171387
sample: 10
log_prob: -2.819950819015503
entropy: 2.4812870025634766
sample: 0
log_prob: -3.6166977882385254
entropy: 2.504544258117676
sample: 9
log_prob: -2.8392186164855957
entropy: 2.6052680015563965
sample: 17
log_prob: -2.0475592613220215
entropy: 2.6295950412750244
sample: 12
log_prob: -3.989326000213623
entropy: 2.5796475410461426
sample: 10
log_prob: -1.7504608631134033
entropy: 2.536266326904297
sample: 9
log_prob: -2.124523639678955
entropy: 2.5106568336486816
sample: 18
log_prob: -2.5838990211486816
entropy: 2.4358034133911133
sample: 15
log_prob: -2.036226272583008
entropy: 2.34993314743042


In [8]:
# discrete.TopkMetaPPO
# given a list of stocks, repeat topk times to select which stock to buy WITHOUT replacement

logits = torch.randn(stock_num).unsqueeze(0)
dist = MultivariateHypergeometric(logits=logits, topk=topk)
test_dist(dist)

sample: tensor([[11,  9,  0, 18, 10, 16,  5, 14, 19,  1]])
log_prob: tensor([-19.0819])
entropy: tensor([2.4289])


In [9]:
# discrete.WeightMetaPPO
# given a list of stocks, repeat topk times to select which stock to buy WITH replacement
# or given a budget (e.g., total_count=topk), chose one stock to buy each time

logits = torch.randn(stock_num)
dist = MyMultinomial(logits=logits, total_count=topk)
test_dist(dist)

sample: tensor([0., 1., 3., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 1., 0.,
        1., 0.])
log_prob: -12.822747230529785
entropy: 0.0
