In [1]:
import numpy as np
import torch
import torch.nn.functional as F

In [2]:
N_DICE = (1, 1) # tuple (number of dice P1, number of dice P2)

class Die():
    faces = ["L", "2", "3", "4", "5", "6"] # Llamas first

    def __init__(self):
        self.roll()
    
    # roll the die n times and return the result as a list
    def roll(self, n=1):
        result = np.random.choice(Die.faces, n)
        self.result = result[-1]
        return result
        

class Player():
    action_space = np.concat(
        [[face * i for face in Die.faces] for i in range(1,np.sum(N_DICE) + 1)] +
        [["L" * i for i in range(np.sum(N_DICE) + 1, 2 * np.sum(N_DICE) + 1)]] +
        [["D", "C"]] # doubt or call
    )
    marked_for_removal = []
    for i, el in enumerate(action_space):
        if el[0] == "L":
            if len(el) % 2 == 1:
                marked_for_removal.append(i)
            else:
                action_space[i] = el[:len(el)//2]
    action_space = np.delete(action_space, marked_for_removal)
    print(action_space)

    def __init__(self, player_id):
        self.player_id = player_id
        self.die = Die()
        self.private = self.die.roll(N_DICE[player_id])


class Node():
    n_nodes = 0
    leaves = []

    def __init__(self, parent, last_action=None):
        self.parent = parent
        if parent is None:
            self.player = 0 # start with player zero
            self.probability = torch.ones([len(Die.faces) ** n for n in N_DICE]) # certain to hit this node
            Node.player_logits = [[], []]
            Node.n_nodes = 0
        else:
            self.player = (self.parent.player + 1) % 2
            self.probability = None
        Node.n_nodes += 1
        self.last_action = last_action
        self.is_leaf = last_action == "D" or last_action == "C"
        self.children = {} # empty dictionary. Keys are actions and values are Nodes.
        if self.is_leaf:
            self.logits = None
            Node.leaves.append(self)
        else:
            if last_action is None:
                starting_index = 0
            else:
                starting_index = np.where(Player.action_space == last_action)[0][0] + 1
            possible_actions = Player.action_space[starting_index:]
            if last_action is None:
                possible_actions = np.delete(possible_actions,
                                             np.where([x[0] in ("L", "D", "C") for x in possible_actions])[0])
            n_actions = len(possible_actions)
            n_private = len(Die.faces) ** N_DICE[self.player]

            self.logits = torch.randn(n_private, n_actions, requires_grad=True)
            Node.player_logits[self.player].append(self.logits)

            for action in possible_actions:
                self.children[action] = Node(parent=self, last_action=action)
    
    def propagate_probability(self):
        assert self.probability is not None
        # the probability is a matrix with rows possible private info of self.player and
        # columns possible private info of the opponent. Each entry corresponds to the
        # conditional probability of arriving at that node given the private information.
        softmaxed = F.softmax(self.logits, dim=-1)
        for i, child in enumerate(self.children.values()): # correct order verified.
            give_prob = softmaxed[:, i].unsqueeze(1)
            child.probability = (give_prob * self.probability).t()
            if not child.is_leaf:
                child.propagate_probability()
        

PLAYERS = [Player(i) for i in range(len(N_DICE))]
grandfather_node = Node(None)

['2' '3' '4' '5' '6' 'L' '22' '33' '44' '55' '66' 'LL' 'D' 'C']


In [3]:
print(grandfather_node.children["5"].children["55"].logits)
print(Node.n_nodes)
print(len(Node.leaves))
print(Node.n_nodes - len(Node.leaves))
print([len(x) for x in Node.player_logits])

tensor([[-0.2975,  0.9789,  0.4605, -2.1705],
        [ 0.7175, -1.4465, -0.5294,  0.4619],
        [-0.4861, -0.0653, -0.4757, -0.3520],
        [ 0.0023, -1.7453,  0.1405,  0.0548],
        [ 0.2050, -0.3730,  0.8397,  0.4215],
        [ 1.5945,  0.9045,  0.9514, -0.0138]], requires_grad=True)
12091
8060
4031
[2016, 2015]


In [4]:
print(Node.leaves[0].probability)
grandfather_node.propagate_probability()
print(Node.leaves[0].probability)

stacked = torch.stack([leaf.probability.t() if leaf.player == 1
                       else leaf.probability for leaf in Node.leaves], dim=0)

print(stacked.shape)
torch.sum(stacked, dim=0)



None
tensor([[4.8727e-14, 7.6791e-16, 2.4611e-14, 1.0148e-16, 2.1000e-13, 2.8977e-15],
        [7.8525e-14, 1.2375e-15, 3.9661e-14, 1.6355e-16, 3.3842e-13, 4.6697e-15],
        [7.4561e-13, 1.1750e-14, 3.7659e-13, 1.5529e-15, 3.2133e-12, 4.4339e-14],
        [1.9067e-12, 3.0049e-14, 9.6303e-13, 3.9712e-15, 8.2174e-12, 1.1339e-13],
        [1.3576e-13, 2.1395e-15, 6.8568e-14, 2.8275e-16, 5.8508e-13, 8.0732e-15],
        [3.1207e-12, 4.9180e-14, 1.5762e-12, 6.4995e-15, 1.3449e-11, 1.8558e-13]],
       grad_fn=<TBackward0>)
torch.Size([8060, 6, 6])


tensor([[1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000]],
       grad_fn=<SumBackward1>)

In [5]:
a = torch.randn(4, 1)
b = torch.randn(4, 5)
a = a.expand((4,5))
print(a)


tensor([[ 0.7405,  0.7405,  0.7405,  0.7405,  0.7405],
        [-0.3204, -0.3204, -0.3204, -0.3204, -0.3204],
        [-0.2310, -0.2310, -0.2310, -0.2310, -0.2310],
        [ 0.0985,  0.0985,  0.0985,  0.0985,  0.0985]])


### Insights from Von Neuman's work and ChatGPT

- At each step, for each player, alternating between them, I should optimize for the probability distribution that maximizes the active player's overall expected probability of winning.
- This can be done by computing the expected winning probability of the *entire tree*, and maximizing the probability of the winning leaves by only changing the player's own nodes.

Something like this:
```python
# Optimizers for max (P1) and min (P2)
opt_p1 = torch.optim.Adam([p1_logits], lr=1e-1)
opt_p2 = torch.optim.Adam([p2_logits], lr=1e-1)

for it in range(5000):
    # — P1 update (ascent) —
    opt_p1.zero_grad()
    ev = expected_payoff()
    (-ev).backward(retain_graph=True)   # gradient of –E wrt p1_logits
    opt_p1.step()

    # — P2 update (descent) —
    opt_p2.zero_grad()
    ev = expected_payoff()
    (ev).backward()                     # gradient of  E wrt p2_logits
    opt_p2.step()

    if it % 500 == 0:
        print(f"iter {it:4d}   EV ≈ {ev.item(): .4f}")
```