# Imports

In [4]:
import torch
import cgd_utils
import numpy as np

# Game Setup

In [37]:
# Game constants
num_evaders = 3
num_exits = 9
points_per_inspector = [3, 3, 3]
num_inspectors = len(points_per_inspector)

bounds = [0] + list(np.cumsum(points_per_inspector))
inspector_ranges = [(bounds[i], bounds[i+1]) for i in range(num_inspectors)]

# Check that each point has exactly one inspector.
assert(sum(points_per_inspector) == num_exits)

# Calculate game payoffs
def calculate_expected_payoffs(evader_player_list, inspector_player_list):
    '''Given lists of evader probabilities and inspector probabilities, compute expected payoffs'''
    # Normalize each evader's probabilities to [0, 1].
    normalized_evader_list = (
        [evader_tensor / torch.norm(evader_tensor, 1) 
         for evader_tensor in evader_player_list])
    
    # Normalize each inspector's probabilities to [0,1].
    normalized_inspector_list = (
        [inspect_tensor / torch.norm(inspect_tensor, 1) 
         for inspect_tensor in inspector_player_list])
    
    inspector_probabilities = 1 - torch.cat(normalized_inspector_list)
    
    # Get evader expected payoffs in-order of evader, which is probability that
    # evader choses a point, that the inspector does not (i.e. complement)
    evader_payoffs = [-torch.dot(evader_tensor, inspector_probabilities) 
                      for evader_tensor in normalized_evader_list]
    
    # Define list of inspector payoffs
    inspector_payoffs = [torch.tensor(0.) 
                         for _ in range(num_inspectors)]
    
    # For inspector, payoff is probability that evader and inspector both chose the same exit.
    for evader_tensor in normalized_evader_list:
        for i, ((start, stop), inspector_tensor) in (
                enumerate(zip(inspector_ranges, normalized_inspector_list))):
            
            inspector_payoffs[i] += -torch.dot(inspector_tensor, evader_tensor[start: stop])
            
    return evader_payoffs, inspector_payoffs
            

# Simulation

In [42]:
num_iterations = 200
learning_rates = [0.1] * (num_evaders + num_inspectors)

# Define initial probability-ish tensors for evaders and invaders
evader_player_list = [torch.tensor([1.] * num_exits, 
                                   requires_grad=True) 
                      for _ in range(num_evaders)]

inspector_player_list = [torch.tensor([1.] * (num_points), 
                                      requires_grad=True) 
                          for num_points in points_per_inspector]

for i in range(num_iterations):
    evader_payoffs, inspector_payoffs = calculate_expected_payoffs(evader_player_list, inspector_player_list)
    
    updates, _ = cgd_utils.metamatrix_conjugate_gradient(
        evader_payoffs + inspector_payoffs, 
        evader_player_list + inspector_player_list, 
        lr_list=learning_rates)
    
    
    for player, update in zip(evader_player_list + inspector_player_list, updates):
        player.data.add_(update)
        
normalized_evader_list = (
        [evader_tensor / torch.norm(evader_tensor, 1) 
         for evader_tensor in evader_player_list])
normalized_inspector_list = (
        [inspect_tensor / torch.norm(inspect_tensor, 1) 
         for inspect_tensor in inspector_player_list])
        
print(normalized_evader_list)
print(normalized_inspector_list)

[tensor([0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111],
       grad_fn=<DivBackward0>), tensor([0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111],
       grad_fn=<DivBackward0>), tensor([0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111],
       grad_fn=<DivBackward0>)]
[tensor([0.3333, 0.3333, 0.3333], grad_fn=<DivBackward0>), tensor([0.3333, 0.3333, 0.3333], grad_fn=<DivBackward0>), tensor([0.3333, 0.3333, 0.3333], grad_fn=<DivBackward0>)]
