In [1]:
import numpy as np

In [2]:
def computeExpectedPayoffPerAction(payoffMatrix, policy):
    return np.sum(payoffMatrix * policy, axis=1)

In [3]:
Payoff_1 = np.array([
    [6, 3],
    [3, 4.5]
])

Payoff_2 = np.array([
    [22, 26],
    [22, 18]
])

In [4]:
policy_1 = np.array([0.5, 0.5])
policy_2 = np.array([1/3, 2/3])

In [5]:
computeExpectedPayoffPerAction(Payoff_1, policy_2)

array([4., 4.])

In [6]:
computeExpectedPayoffPerAction(Payoff_2.T, policy_1)

array([22., 22.])

In [7]:
def computePayoffs(pMatrix_1, pMatrix_2, policy_1, policy_2):
    return (computeExpectedPayoffPerAction(pMatrix_1, policy_2), computeExpectedPayoffPerAction(pMatrix_2.T, policy_1))

computePayoffs(Payoff_1, Payoff_2, policy_1, policy_2)

(array([4., 4.]), array([22., 22.]))

In [8]:
def randomPolicy(n):
    policy = np.random.rand(n)
    return policy / np.sum(policy)

In [9]:
def toPolarCoord(p):
    return np.arctan2(p[1], p[0]) * 180 / np.pi, np.linalg.norm(p)

In [25]:
def normalizePayoffNorm(payoffMatrix, payoff):
    maxPayoff = np.linalg.norm(np.max(payoffMatrix, axis=1))
    minPayoff = np.linalg.norm(np.min(payoffMatrix, axis=1))
    return (payoff - minPayoff) / (maxPayoff - minPayoff) 

In [30]:
def offsetPolicy(policy, scale=0.05):
    scale = 100 if np.random.rand()< 0.1 else scale
    offset = np.random.rand(len(policy)) * scale
    return (policy + offset) / np.sum(policy + offset)

In [12]:
import matplotlib.pyplot as plt
import matplotlib.animation
import numpy as np

In [32]:
plt.rcParams["animation.html"] = "jshtml"
plt.rcParams['figure.dpi'] = 150  
plt.ioff()
fig, ax = plt.subplots()

trajectory = []
bestAnglesOffset = [90, 90]

policy_1 = randomPolicy(2)
policy_2 = randomPolicy(2)

bestP1, bestP2 = None, None

def animate(t):
    global policy_1, policy_2, trajectory, bestAnglesOffset, bestP1, bestP2

    plt.cla()
    new_policy_1 = offsetPolicy(policy_1)
    new_policy_2 = offsetPolicy(policy_2)

    p1_payoff, p2_payoff = computePayoffs(Payoff_1, Payoff_2, new_policy_1, new_policy_2)
    p1, p2 = toPolarCoord(p1_payoff), toPolarCoord(p2_payoff)
    p1 = (p1[0], normalizePayoffNorm(Payoff_1, p1[1]))
    p2 = (p2[0], normalizePayoffNorm(Payoff_2.T, p2[1]))
    if(np.abs(p1[0] - 45) < bestAnglesOffset[0]):
        policy_1 = new_policy_1
        bestAnglesOffset[0] = np.abs(p1[0] - 45)
        bestP1 = p1

    if(np.abs(p2[0] - 45) < bestAnglesOffset[1]):
        policy_2 = new_policy_2
        bestAnglesOffset[1] = np.abs(p2[0] - 45)
        bestP2 = p2
        
    trajectory.append((bestP1, bestP2))
    plt.plot([p[0][0] for p in trajectory], [p[0][1] for p in trajectory], label='Player 1')
    plt.scatter(bestP1[0], bestP1[1], color='blue')
    plt.plot([p[1][0] for p in trajectory], [p[1][1] for p in trajectory], label='Player 2')
    plt.scatter(bestP2[0], bestP2[1], color='orange')
    plt.xlabel('Payoff angle')
    plt.ylabel('Payoff normalized module')
    plt.legend()
    plt.title('Payoff space')
    plt.text(0, 0, f'Player 1 policy: {policy_1} -> {p1_payoff} \nPlayer 2 policy: {policy_2} -> {p2_payoff}', fontsize=8)
    plt.axis([0, 90, 0, 1])

matplotlib.animation.FuncAnimation(fig, animate, frames=100, interval=100)


In [23]:
policy_1 = randomPolicy(2)
policy_2 = randomPolicy(2)
bestAnglesOffset = [90, 90]
for step in range(10):
    new_policy_1 = offsetPolicy(policy_1)
    new_policy_2 = offsetPolicy(policy_2)

    p1, p2 = computePayoffs(Payoff_1, Payoff_2, new_policy_1, new_policy_2)
    p1, p2 = toPolarCoord(p1), toPolarCoord(p2)
    print(p1, p2, bestAnglesOffset)
    if(np.abs(p1[0] - 45) < bestAnglesOffset[0]):
        policy_1 = new_policy_1
        bestAnglesOffset[0] = np.abs(p1[0] - 45)

    if(np.abs(p2[0] - 45) < bestAnglesOffset[1]):
        policy_2 = new_policy_2
        bestAnglesOffset[1] = np.abs(p2[0] - 45)

    print(f'Player 1 policy: {policy_1} Player 2 policy: {policy_2}')

(52.46677770555501, 5.466418040569593) (42.97967795029937, 30.071259946280552) [90, 90]
Player 1 policy: [0.31259352 0.68740648] Player 2 policy: [0.11008618 0.88991382]
(50.587977719079106, 5.504351366004868) (43.369579834733116, 30.263872132204394) [7.46677770555501, 2.0203220497006313]
Player 1 policy: [0.34778137 0.65221863] Player 2 policy: [0.16489072 0.83510928]
(48.91217843102491, 5.5436949641453275) (43.328653728475345, 30.24347284015955) [5.587977719079106, 1.6304201652668837]
Player 1 policy: [0.3440667 0.6559333] Player 2 policy: [0.16489072 0.83510928]
(49.155336869016146, 5.53765702388731) (43.77525058474487, 30.468418499064875) [3.9121784310249126, 1.6304201652668837]
Player 1 policy: [0.3440667 0.6559333] Player 2 policy: [0.20722861 0.79277139]
(47.28609295320183, 5.58701363102998) (43.65320426665082, 30.406429933124777) [3.9121784310249126, 1.2247494152551326]
Player 1 policy: [0.37366329 0.62633671] Player 2 policy: [0.20722861 0.79277139]
(46.794451819715626, 5.6011

In [14]:
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams["animation.html"] = "jshtml"
plt.rcParams['figure.dpi'] = 150  
plt.ioff()
fig, ax = plt.subplots()

x= np.linspace(0,10,100)
def animate(t):
    plt.cla()
    plt.plot(x-t,x)
    plt.xlim(0,10)

matplotlib.animation.FuncAnimation(fig, animate, frames=10)