Colonel Blotto and his arch-enemy, Boba Fett, are at war. Each commander has S soldiers in total,
and each soldier can be assigned to one of $N<S$ battlefields. Naturally, these commanders do not
communicate and hence direct their soldiers independently. Any number of soldiers can be allocated
to each battlefield, including zero. A commander claims a battlefield if they send more soldiers to
the battlefield than their opponent. The commander’s job is to break down his pool of soldiers into
groups to which he assigned to each battlefield. The winning commander is the one who claims the
most battlefields. For example, with (S, N) = (10, 4) a Colonel Blotto may choose to play (2, 2, 2, 4)
while Boba Fett may choose to play (8, 1, 1, 0). In this case, Colonel Blotto would win by claiming
three of the four battlefields. The war ends in a draw if both commanders claim the same number of
battlefields.

In [21]:
import numpy as np
import random as rand
from tqdm import tqdm

In [50]:
# Hard coded for case where S = 5 and N = 3
class ColonelBlotto:
   def __init__(self):
      self.states = {}
      self.NUM_ACTIONS = 0
      for i in range(0,6):
         for j in range(0,6-i):
               self.states[self.NUM_ACTIONS] = (i, j, 5-i-j)
               self.NUM_ACTIONS += 1
      
      self.strategySum = np.zeros(self.NUM_ACTIONS)
      self.regretSum = np.zeros(self.NUM_ACTIONS)

   def get_strat(self):
      strat = [i if i > 0 else 0 for i in self.regretSum]

      if sum(strat) == 0:
         strat = np.ones(self.NUM_ACTIONS)/self.NUM_ACTIONS
      else:
         strat = strat/sum(strat)

      self.strategySum += strat

      return strat


   def get_action(self, strat):
      return np.random.choice(range(self.NUM_ACTIONS), p = strat)
   
   def get_utility_array(self, opp_action):
      utils = np.zeros(self.NUM_ACTIONS)
      opp_army = self.states[opp_action]

      for i in range(len(utils)):
         my_army = self.states[i]
         for (a,b) in zip(my_army, opp_army):
            if a > b: 
               utils[i] += 1
            if a < b: 
               utils[i] -= 1

      return utils
   
   def update_regrets(self, my_action, action_utilities):
      for i in range(self.NUM_ACTIONS):
         self.regretSum[i] += action_utilities[i] - action_utilities[my_action]

   def getAverageStrategy(self):
      avgStrategy = [i if i > 0 else 0 for i in self.strategySum]

      if sum(avgStrategy) > 0:
         avgStrategy = avgStrategy/sum(avgStrategy)
      else:
         avgStrategy = np.ones(self.NUM_ACTIONS)/self.NUM_ACTIONS
      
      return avgStrategy

In [23]:
def train(p1 : ColonelBlotto, p2: ColonelBlotto, epochs : int):
    for i in tqdm(range(epochs)):
        p1_strat = p1.get_strat()
        p2_strat = p2.get_strat()
        p1_action = p1.get_action(p1_strat)
        p2_action = p2.get_action(p2_strat)

        action_utilities_1 = p1.get_utility_array(p2_action)
        action_utilities_2 = p2.get_utility_array(p1_action)

        p1.update_regrets(p1_action, action_utilities_1)
        p2.update_regrets(p2_action, action_utilities_2)

In [56]:
p1 = ColonelBlotto()
p2 = ColonelBlotto()

train(p1, p2, 1000000)

100%|██████████| 1000000/1000000 [01:08<00:00, 14556.51it/s]


In [57]:
nash1 = p1.getAverageStrategy()

for i,p in enumerate(nash1):
    if p > 0.01:
        print(p1.states[i], p)

(0, 2, 3) 0.11755366368157785
(0, 3, 2) 0.10342614676092338
(1, 1, 3) 0.10895944510060597
(1, 3, 1) 0.11797041474666159
(2, 0, 3) 0.11208540538714598
(2, 3, 0) 0.11027568174949157
(3, 0, 2) 0.10922596100949618
(3, 1, 1) 0.11611928097536584
(3, 2, 0) 0.10436001057034618


In [58]:
nash2 = p2.getAverageStrategy()

for i,p in enumerate(nash2):
    if p > 0.01:
        print(p2.states[i], p)

(0, 2, 3) 0.11500439601409777
(0, 3, 2) 0.10584833971572975
(1, 1, 3) 0.11250868241723966
(1, 3, 1) 0.1145796191246138
(2, 0, 3) 0.10686288993853836
(2, 3, 0) 0.11204836672674937
(3, 0, 2) 0.11316198628717061
(3, 1, 1) 0.11369029421570248
(3, 2, 0) 0.10623488651242435
