# Imports

In [6]:
import torch
from multi_cmd import cgd_utils
import numpy as np


# Game Setup

In [10]:
# Game constants
num_evaders = 2
num_exits = 7
points_per_inspector = [3, 3, 1]
num_inspectors = len(points_per_inspector)

# Computing helper values for calculating expected payoffs
bounds = [0] + list(np.cumsum(points_per_inspector))
inspector_ranges = [(bounds[i], bounds[i+1]) for i in range(num_inspectors)]

# Check that each point has exactly one inspector.
assert(sum(points_per_inspector) == num_exits)

# Calculate game payoffs
def calculate_expected_payoffs(evader_player_list, inspector_player_list):
    '''Given lists of evader probabilities and inspector probabilities, compute expected payoffs'''
    
    # Normalize each evader's probabilities to [0, 1] using softmax.
    normalized_evader_list = (
        [torch.nn.Softmax(dim=0)(evader_tensor)
         for evader_tensor in evader_player_list])
    
    # Normalize each inspector's probabilities to [0,1] using softmax.
    normalized_inspector_list = (
        [torch.nn.Softmax(dim=0)(inspect_tensor)
         for inspect_tensor in inspector_player_list])
     
    # Evader probability of gaining 1 calculated with probability that inspector does not choose this point.
    inspector_probabilities = 1 - torch.cat(normalized_inspector_list)
    
    # Get evader expected payoffs in-order of evader, which is probability that
    # evader choses a point, that the inspector does not (i.e. complement)
    evader_payoffs = [-torch.dot(evader_tensor, inspector_probabilities) 
                      for evader_tensor in normalized_evader_list]
    
    # Define list of inspector payoffs
    inspector_payoffs = [torch.tensor(0.) 
                         for _ in range(num_inspectors)]
    
    # For inspector, payoff is probability that evader and inspector both chose the same exit.
    for evader_tensor in normalized_evader_list:
        for i, ((start, stop), inspector_tensor) in (
                enumerate(zip(inspector_ranges, normalized_inspector_list))):
            
            inspector_payoffs[i] += -torch.dot(inspector_tensor, evader_tensor[start: stop])
            
    return evader_payoffs, inspector_payoffs
            

# Simulating Invader Evader with Random Starting Weights

In [12]:
num_iterations = 1000
learning_rates = [0.1] * (num_evaders + num_inspectors)

# Define initial probability-ish tensors for evaders and invaders
evader_player_list = []
for _ in range(num_evaders):
    random_samples = torch.tensor(np.random.uniform(size=(num_exits)), requires_grad=True)
    evader_player_list.append(random_samples / torch.norm(random_samples, 1))

inspector_player_list = []
for num_points in points_per_inspector:
    random_samples = torch.tensor(np.random.uniform(size=(num_points)), requires_grad=True)
    inspector_player_list.append(random_samples / torch.norm(random_samples, 1))                            
                                
print("Initial Evader Probabilities")
for tensor in evader_player_list:
    print("   " + str(tensor))

print()
    
print("Initial Inspector Probabilities")
for tensor in inspector_player_list:
    print("    " + str(tensor))

# Simulate multiple rounds of game
for i in range(num_iterations):
    if (i % 100 == 0):
        print(i)
        
    evader_payoffs, inspector_payoffs = calculate_expected_payoffs(evader_player_list, inspector_player_list)
    
    updates, _ = cgd_utils.metamatrix_conjugate_gradient(
        evader_payoffs + inspector_payoffs, 
        evader_player_list + inspector_player_list, 
        lr_list=learning_rates)
    
    for player, update in zip(evader_player_list + inspector_player_list, updates):
        player.data.add_(update)
    
#     # Normalize probabilities for next iteration with softmax.
#     evader_player_list = (
#         [torch.nn.Softmax(dim=0)(evader_tensor)
#          for evader_tensor in evader_player_list])
#     inspector_player_list = (
#         [torch.nn.Softmax(dim=0)(inspect_tensor)
#          for inspect_tensor in inspector_player_list])
    
#     evader_player_list = [tensor.clone().detach().requires_grad_(True) for tensor in evader_player_list]
#     inspector_player_list = [tensor.clone().detach().requires_grad_(True) for tensor in inspector_player_list] 

# Look at final normalized probabilities using softmax.
normalized_evader_list = (
        [torch.nn.Softmax(dim=0)(evader_tensor) 
         for evader_tensor in evader_player_list])
normalized_inspector_list = (
        [torch.nn.Softmax(dim=0)(inspect_tensor)
         for inspect_tensor in inspector_player_list])

print("Final Evader Probabilities")
for tensor in evader_player_list:
    print("   " + str(tensor))

print()
    
print("Final Inspector Probabilities")
for tensor in inspector_player_list:
    print("    " + str(tensor))

Initial Evader Probabilities
   tensor([0.0825, 0.1140, 0.0860, 0.1611, 0.2301, 0.1505, 0.1758],
       dtype=torch.float64, grad_fn=<DivBackward0>)
   tensor([0.1772, 0.1351, 0.0419, 0.1810, 0.1949, 0.1664, 0.1035],
       dtype=torch.float64, grad_fn=<DivBackward0>)

Initial Inspector Probabilities
    tensor([0.2958, 0.3363, 0.3678], dtype=torch.float64, grad_fn=<DivBackward0>)
    tensor([0.4959, 0.1417, 0.3625], dtype=torch.float64, grad_fn=<DivBackward0>)
    tensor([1.], dtype=torch.float64, grad_fn=<DivBackward0>)
0
100
200
300
400
500
600
700
800
900
Final Evader Probabilities
   tensor([ 0.4344,  0.4573,  0.4360,  0.4275,  0.7637,  0.5364, -2.0554],
       dtype=torch.float64, grad_fn=<DivBackward0>)
   tensor([ 0.5569,  0.4732,  0.3614,  0.4400,  0.6871,  0.5449, -2.0634],
       dtype=torch.float64, grad_fn=<DivBackward0>)

Final Inspector Probabilities
    tensor([0.3391, 0.3577, 0.3032], dtype=torch.float64, grad_fn=<DivBackward0>)
    tensor([0.2969, 0.4099, 0.2932], dty

# Simulation with Initial Nash Equilbrium Weights

In [5]:
num_iterations = 1000
learning_rates = [1] * (num_evaders + num_inspectors)

# Define initial probability-ish tensors for evaders and invaders
evader_player_list = [torch.tensor([1/6] * num_exits, 
                                   requires_grad=True) 
                      for _ in range(num_evaders)]

inspector_player_list = [torch.tensor([1/2] * num_points, 
                                      requires_grad=True) 
                          for num_points in points_per_inspector]

# Simulate multiple rounds of game
for i in range(num_iterations):

    evader_payoffs, inspector_payoffs = calculate_expected_payoffs(evader_player_list, inspector_player_list)
    
    updates, _ = cgd_utils.metamatrix_conjugate_gradient(
        evader_payoffs + inspector_payoffs, 
        evader_player_list + inspector_player_list, 
        lr_list=learning_rates)
    
    
    for player, update in zip(evader_player_list + inspector_player_list, updates):
        player.data.add_(update)

# Look at final normalized probabilities
normalized_evader_list = (
        [evader_tensor / torch.norm(evader_tensor, 1) 
         for evader_tensor in evader_player_list])
normalized_inspector_list = (
        [inspect_tensor / torch.norm(inspect_tensor, 1) 
         for inspect_tensor in inspector_player_list])

print("Evader Probabilities")
for tensor in normalized_evader_list:
    print("   " + str(tensor))

print()
    
print("Inspector Probabilities")
for tensor in normalized_inspector_list:
    print("    " + str(tensor))

Evader Probabilities
   tensor([0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111],
       grad_fn=<DivBackward0>)
   tensor([0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111, 0.1111],
       grad_fn=<DivBackward0>)

Inspector Probabilities
    tensor([0.3333, 0.3333, 0.3333], grad_fn=<DivBackward0>)
    tensor([0.3333, 0.3333, 0.3333], grad_fn=<DivBackward0>)
    tensor([0.3333, 0.3333, 0.3333], grad_fn=<DivBackward0>)
