### Game theory analysis

In [1]:
from __future__ import division
from random import random
import numpy as np
import pandas as pd

In [2]:
class Coffee_Strategy:
    actions = ['bakery', 'bagels', 'diner','icecream_and_froyo','purely_coffee','coffee_and_tea']
    n_actions = 6
    ##below complete reading in pandas dataframe
    utilities = pd.DataFrame([],

    columns=actions, index=actions)

In [3]:
class Player:
    def __init__(self, name):
        self.strategy, self.avg_strategy,\
        self.strategy_sum, self.regret_sum = np.zeros((4, Coffee_Strategy.n_actions))
        self.name = name
    def __repr__(self):
        return self.name 
    def update_strategy(self):
        """
        set the strategy of choosing a category of coffee shop  to be proportional to positive regrets
        """
        self.strategy = np.copy(self.regret_sum)
        self.strategy[self.strategy < 0] = 0  # reset negative regrets to zero

        summation = sum(self.strategy)
        if summation > 0:
            # normalise
            self.strategy /= summation
        else:
            # uniform distribution to reduce exploitability
            self.strategy = np.repeat(1 / Coffee_Strategy.n_actions, Coffee_Strategy.n_actions)

        self.strategy_sum += self.strategy
    def regret(self, my_action, opp_action):
        """
        define the regret of not having chosen an action as the difference between the utility of that action
        and the utility of the action we actually chose, with respect to the fixed choices of the other player.
        compute the regret and add it to regret sum.
        """
        result = Coffee_Strategy.utilities.loc[my_action, opp_action]
        facts = Coffee_Strategy.utilities.loc[:, opp_action].values
        regret = facts - result
        self.regret_sum += regret

    def action(self, use_avg=False):
        """
        select an action according to coffee category probabilities
        """
        strategy = self.avg_strategy if use_avg else self.strategy
        return np.random.choice(Coffee_Strategy.actions, p=strategy)

    def learn_avg_strategy(self):
        # averaged strategy converges to Nash Equilibrium
        summation = sum(self.strategy_sum)
        if summation > 0:
            self.avg_strategy = self.strategy_sum / summation
        else:
            self.avg_strategy = np.repeat(1/Coffee_Strategy.n_actions, Coffee_Strategy.n_actions)
            


## Complete below class to finish game and evaluate Nash eq for each pair of target coffeeshop and neighbor.To complete....

In [None]:
class Game:
    def __init__(self, max_game=10000):
        self.p1 = Player('Target Coffee Shop')
        self.p2 = Player('Neighbor Coffee Shop')
        self.max_game = max_game

    def winner(self, a1, a2):
        result = Coffee_Strategy.utilities.loc[a1, a2]
        if result == 1:     return self.p1
        elif result == -1:  return self.p2
        else:               return 'Draw'

    def play(self, avg_regret_matching=False):
        def play_regret_matching():
            for i in xrange(0, self.max_game):
                self.p1.update_strategy()
                self.p2.update_strategy()
                a1 = self.p1.action()
                a2 = self.p2.action()
                self.p1.regret(a1, a2)
                self.p2.regret(a2, a1)

                winner = self.winner(a1, a2)
                num_wins[winner] += 1

        def play_avg_regret_matching():
            for i in xrange(0, self.max_game):
                a1 = self.p1.action(use_avg=True)
                a2 = self.p2.action(use_avg=True)
                winner = self.winner(a1, a2)
                num_wins[winner] += 1

        num_wins = {
            self.p1: 0,
            self.p2: 0,
            'Draw': 0
        }

        play_regret_matching() if not avg_regret_matching else play_avg_regret_matching()
        print num_wins

    def conclude(self):
        """
         conclude the average strategy from the previous strategy stats until Nash eq.
        """
        self.p1.learn_avg_strategy()
        self.p2.learn_avg_strategy()


if __name__ == '__main__':
    game = Game()

    print '==== Use simple regret-matching strategy === '
    game.play()
    print '==== Nash eq === '
    game.conclude()
game.play(avg_regret_matching=True)