# Fictitious PLay vs Fictitious PLay

In [None]:
from games.mp import MP
from agents.fictitiousplay import FictitiousPlay
from agents.random_agent import RandomAgent
from auxiliar.repeated_normalform_games_functions import iter_game, plot_rewards, plot_policies

import matplotlib.pyplot as plt
import numpy as np

In [None]:
agent_alias = {'agent_0':'FP-agent_0', 'agent_1':'FP-agent_1'}
agent_classes = {'agent_0': FictitiousPlay, 'agent_1': FictitiousPlay}

## Matching Pennies

In [None]:
# Definir juego
g = MP()

# Definir labels
action_labels = ['$\pi(H)$', '$\pi(T)$']

# Definir parámetros del experimento
NITS = 5           # Cantidad de iteraciones
NSTEPS = int(1e3)  # Cantidad de steps por iteración

# Iterar juegos
iter_game(NITS, NSTEPS, g, agent_classes, action_labels, agent_alias, plot_simplex=True)

# Rock, Paper, Scissors

In [None]:
from games.rps import RPS

In [None]:
# Definir juego
g = RPS()

# Definir labels
action_labels = ['$\pi(R)$', '$\pi(P)$', '$\pi(S)$']

# Definir parámetros del experimento
NITS = 10           # Cantidad de iteraciones
NSTEPS = int(10e3)  # Cantidad de steps por iteración

# Iterar juegos
rewards, policies = iter_game(NITS, NSTEPS, g, agent_classes, action_labels, agent_alias)

In [None]:
iter_policy = {agent: agent_policies[:,:] for agent, agent_policies in policies.items()}
N = len(iter_policy['agent_0'])

fig, ax = plt.subplots(figsize=(6, 6))
ax.plot(
    iter_policy['agent_0'][:, 0],
    iter_policy['agent_0'][:, 1],
    label='Agent A',
    color='tab:blue',
    alpha=0.5
)
ax.plot(
    1 - iter_policy['agent_1'][:, 0],
    1 - iter_policy['agent_1'][:, 1],
    label='Agent B',
    color='orange',
    alpha=0.5
)
ax.scatter(iter_policy['agent_0'][-1, 0], iter_policy['agent_0'][-1, 1],
           color='tab:blue', marker='*', s=100, label='Converged policy agent A')

ax.scatter(1 - iter_policy['agent_1'][-1, 0], 1 - iter_policy['agent_1'][-1, 1],
           color='orange', marker='*', s=100, label='Converged policy agent B')
ax.plot([0, 1], [1, 0], color='black', linestyle='--')

ax.plot([iter_policy['agent_0'][-1, 0], iter_policy['agent_0'][-1, 1]], [iter_policy['agent_0'][-1, 0], 0], color='grey', linestyle='--')
ax.plot([0, iter_policy['agent_0'][-1, 1]], [iter_policy['agent_0'][-1, 0], iter_policy['agent_0'][-1, 1]], color='grey', linestyle='--')

ax.plot([1-iter_policy['agent_1'][-1, 0], 1-iter_policy['agent_1'][-1, 1]], [1-iter_policy['agent_1'][-1, 0], 1], color='grey', linestyle='--')
ax.plot([1, 1-iter_policy['agent_1'][-1, 1]], [1-iter_policy['agent_1'][-1, 0], 1-iter_policy['agent_1'][-1, 1]], color='grey', linestyle='--')

ax.set_xlim(0, 1)
ax.set_ylim(0, 1)

ax.set_xlabel("$\pi_{A}$(Rock)", loc='left')
ax.set_ylabel("$\pi_{A}$(Paper)", loc='bottom')

ax_top = ax.secondary_xaxis('top', functions=(lambda x: 1 - x, lambda x: 1 - x))
ax_top.set_xlabel("$\pi_{B}$(Rock)", loc='right')

ax_right = ax.secondary_yaxis('right', functions=(lambda y: 1 - y, lambda y: 1 - y))
ax_right.set_ylabel("$\pi_{B}$(Paper)", loc='top')

ax.legend()
ax.set_title("Empirical Policy Trajectories")
plt.tight_layout()
plt.show()

# Blotto  

In [None]:
from games.blotto import Blotto

In [None]:
# Definir juego
S = 10 # 10
N = 4  # 5
g = Blotto(S=S, N=N)

# Definir labels
action_labels = [f'$\pi(a_{i}={g._moves[i]}$)' for i in range(g.action_spaces['agent_0'].n)]

# Definir parámetros del experimento
NITS = 10          # Cantidad de iteraciones
NSTEPS = int(1e3) # Cantidad de steps por iteración

# Iterar juegos
iter_game(NITS, NSTEPS, g, agent_classes, action_labels, agent_alias)