In [92]:
import re
from absl import app
from absl import flags
from tqdm import tqdm
import numpy as np

from open_spiel.python import games  # pylint: disable=unused-import
from open_spiel.python import rl_agent
from open_spiel.python import rl_environment
import pyspiel

In [2]:
from open_spiel.python.examples.roshambo_population_example import *

In [3]:
%load_ext autoreload
%autoreload 2

In [109]:
ROSHAMBO_NUM_THROWS = 100
ENV_RECALL = 1
ACTIONS = {
    0: "R",
    1: "P",
    2: "S"
}

In [110]:
env = rl_environment.Environment(
      "repeated_game(stage_game=matrix_rps(),num_repetitions=" +
      f"{ROSHAMBO_NUM_THROWS}," +
      f"recall={ENV_RECALL})",
      include_full_state=True)

num_players = 2
num_actions = env.action_spec()["num_actions"]

In [111]:
pyspiel.ROSHAMBO_NUM_THROWS

1000

In [112]:
print("Loading population...")
pop_size = pyspiel.ROSHAMBO_NUM_BOTS
print(f"Population size: {pop_size}")
roshambo_bot_names = pyspiel.roshambo_bot_names()
roshambo_bot_names.sort()
print_roshambo_bot_names_and_ids(roshambo_bot_names)

bot_id = 0
roshambo_bot_ids = {}
for name in roshambo_bot_names:
    roshambo_bot_ids[name] = bot_id
    bot_id += 1

Loading population...
Population size: 43
Roshambo bot population:
0: actr_lag2_decay
1: adddriftbot2
2: addshiftbot3
3: antiflatbot
4: antirotnbot
5: biopic
6: boom
7: copybot
8: debruijn81
9: driftbot
10: flatbot3
11: foxtrotbot
12: freqbot2
13: granite
14: greenberg
15: halbot
16: inocencio
17: iocainebot
18: marble
19: markov5
20: markovbails
21: mixed_strategy
22: mod1bot
23: multibot
24: peterbot
25: phasenbott
26: pibot
27: piedra
28: predbot
29: r226bot
30: randbot
31: robertot
32: rockbot
33: rotatebot
34: russrocker4
35: shofar
36: sunCrazybot
37: sunNervebot
38: sweetrock
39: switchalot
40: switchbot
41: textbot
42: zq_move


In [132]:
def get_rev_history(history):
    rev_history = []
    for i in range(0,len(history),2):
        rev_history += [history[i + 1],history[i]]
    return rev_history

def game_to_sentence(agents, history, outcome):
    """
    Combine agents, history and outcome from a single game
    to a list representing a sentence in the RPS language
    """
    p1o, p2o = outcome
    rev_history = get_rev_history(history)
    sentences = []
    if p1o > p2o:
        sentence1 = agents + history + ["W"]
        sentence2 = [agents[1],agents[0]] + rev_history + ["L"]
        sentences.append(sentence1)
        sentences.append(sentence2)
    elif p2o > p1o:
        sentence1 = agents + history + ["L"]
        sentence2 = [agents[1],agents[0]] + rev_history + ["W"]
        sentences.append(sentence1)
        sentences.append(sentence2)
    else:
        sentence1 = agents + history + ["Draw"]
        sentences.append(sentence1)
    return sentences

def history_to_sentences(agents, match_history, outcomes):
    sentences = []
    for game_history, outcome in zip(match_history,outcomes):
        sentences += game_to_sentence(agents, game_history, tuple(outcome))
    return sentences

def eval_agents(env, agents, num_players, num_episodes):
  """Evaluate the agent."""
  rewards = []
  history = []
  for ep in range(num_episodes):
    game_history = []
    for agent in agents:
      # Bots need to be restarted at the start of the episode.
      if hasattr(agent, "restart"):
        agent.restart()
    time_step = env.reset()
    episode_rewards = np.zeros(num_players)
    while not time_step.last():
      agents_output = [
          agent.step(time_step, is_evaluation=True) for agent in agents
      ]
      action_list = [agent_output.action for agent_output in agents_output]
      game_history += action_list
      time_step = env.step(action_list)
      episode_rewards += time_step.rewards
    rewards.append(episode_rewards)
    history.append(game_history)
    
  return rewards , history

def get_ev(returns):
    return sum([r[0] for r in returns]) / len(returns)
    

def run_tournament(env, agent_names, num_games):
    N = len(agent_names)
    F = np.zeros((N,N))
    sentences = []
    for i in tqdm(range(N)):
        for j in range(i, N):
            if i == j: continue
            agent_i = agent_names[i]
            agent_j = agent_names[j]
            agents = [
                  create_roshambo_bot_agent(0, num_actions, roshambo_bot_names,
                                            i),
                  create_roshambo_bot_agent(1, num_actions, roshambo_bot_names,
                                            j)
              ]
            eval_returns, history = eval_agents(env, agents, num_players, num_games)
            f_ij = get_ev(eval_returns)
            F[i][j] = f_ij
            F[j][i] = -f_ij
            sentences += history_to_sentences([agent_i, agent_j], history, eval_returns)
    return F, sentences
            

def write_sentences_file(sentences, path):
    with open(path, "w") as f:
        for s in sentences:
            f.write(" ".join([str(s_i) for s_i in s]) + "\n")

In [83]:
sentences = history_to_sentences(agent_names, history, avg_eval_returns)

[]
[['rockbot', 'greenberg', 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 'L'], ['greenberg', 'rockbot', 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 'W']]
[['rockbot', 'greenberg', 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 'L'], ['greenberg', 'rockbot', 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 'W'], ['rockbot', 'greenberg', 0, 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 'L'], ['greenberg', 'rockbot', 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 'W']]
[['rockbot', 'greenberg', 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 'L'], ['greenberg', 'rockbot', 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 'W'], ['rockbot', 'greenberg', 0, 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 'L'], ['greenberg', 'rockbot', 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 'W'], ['rockbot', 'greenberg', 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,

In [85]:
for s in sentences:
    print(str(s))

['rockbot', 'greenberg', 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 'L']
['greenberg', 'rockbot', 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 'W']
['rockbot', 'greenberg', 0, 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 'L']
['greenberg', 'rockbot', 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 'W']
['rockbot', 'greenberg', 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 'L']
['greenberg', 'rockbot', 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 'W']
['rockbot', 'greenberg', 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 'L']
['greenberg', 'rockbot', 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 'W']
['rockbot', 'greenberg', 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 'L']
['greenberg', 'rockbot', 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 'W']


In [67]:
agents = [
      create_roshambo_bot_agent(0, num_actions, roshambo_bot_names,
                                32),
      create_roshambo_bot_agent(1, num_actions, roshambo_bot_names,
                                14)
  ]

In [94]:
avg_eval_returns, history = eval_agents(env, agents, num_players, 5)

In [89]:
avg_eval_returns

[array([-9.,  9.]),
 array([-8.,  8.]),
 array([-9.,  9.]),
 array([-9.,  9.]),
 array([-9.,  9.])]

In [88]:
get_ev(avg_eval_returns)

-8.8

In [43]:
agent_names = [roshambo_bot_names[32], roshambo_bot_names[14]]

In [75]:
sentences = history_to_sentences(agent_names, history, avg_eval_returns)

In [95]:
tourney_sentences

[['actr_lag2_decay',
  'actr_lag2_decay',
  1,
  2,
  0,
  0,
  2,
  2,
  0,
  0,
  0,
  2,
  1,
  2,
  1,
  2,
  0,
  2,
  0,
  0,
  0,
  2,
  'Draw'],
 ['actr_lag2_decay',
  'adddriftbot2',
  0,
  2,
  1,
  2,
  1,
  2,
  0,
  0,
  1,
  0,
  1,
  1,
  2,
  2,
  1,
  1,
  1,
  2,
  2,
  0,
  'L'],
 ['adddriftbot2',
  'actr_lag2_decay',
  2,
  0,
  2,
  1,
  2,
  1,
  0,
  0,
  0,
  1,
  1,
  1,
  2,
  2,
  1,
  1,
  2,
  1,
  0,
  2,
  'W'],
 ['actr_lag2_decay',
  'addshiftbot3',
  1,
  1,
  2,
  2,
  1,
  2,
  1,
  0,
  1,
  1,
  0,
  2,
  0,
  2,
  1,
  2,
  0,
  0,
  2,
  1,
  'W'],
 ['addshiftbot3',
  'actr_lag2_decay',
  1,
  1,
  2,
  2,
  2,
  1,
  0,
  1,
  1,
  1,
  2,
  0,
  2,
  0,
  2,
  1,
  0,
  0,
  1,
  2,
  'L'],
 ['actr_lag2_decay',
  'antiflatbot',
  2,
  0,
  2,
  1,
  1,
  1,
  2,
  1,
  2,
  1,
  2,
  1,
  2,
  1,
  2,
  1,
  2,
  1,
  2,
  1,
  'W'],
 ['antiflatbot',
  'actr_lag2_decay',
  0,
  2,
  1,
  2,
  1,
  1,
  1,
  2,
  1,
  2,
  1,
  2,
  1,
  2,
  1,


In [117]:
F, tourney_sentences = run_tournament(env, roshambo_bot_names, num_games = 10)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 43/43 [01:25<00:00,  1.99s/it]


In [136]:
np.save("rps_names.npy", roshambo_bot_names)
np.save("rps_F.npy", F)

In [133]:
write_sentences_file(tourney_sentences, "./rps_tourney.txt")

In [131]:
tourney_sentences[0][:2]

['actr_lag2_decay', 'adddriftbot2']

In [129]:
" ".join([str(s) for s in tourney_sentences[0]])

'actr_lag2_decay adddriftbot2 0 1 0 1 0 1 2 1 2 1 2 1 2 1 2 1 2 0 0 1 1 1 2 2 2 1 0 0 2 0 0 1 2 1 2 0 1 2 1 0 1 1 2 2 2 0 2 2 1 1 1 0 0 0 1 0 1 1 2 2 1 1 1 1 2 1 2 0 1 2 2 2 0 1 1 1 2 2 2 1 2 0 0 1 0 2 2 2 2 2 0 1 1 1 2 2 2 1 2 2 2 2 2 1 1 1 2 1 1 0 0 1 0 1 0 0 2 0 2 2 2 1 2 2 2 1 2 2 0 1 0 1 2 2 2 1 0 0 1 0 2 1 0 0 1 0 2 1 0 0 2 0 2 2 1 2 2 0 2 2 0 1 0 1 0 0 1 2 2 0 0 1 0 2 2 2 2 1 2 0 1 1 0 2 0 2 2 2 0 1 2 0 2 0 2 2 0 1 1 1 L'

In [124]:
print(tourney_sentences[0])

['actr_lag2_decay', 'adddriftbot2', 0, 1, 0, 1, 0, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 0, 0, 1, 1, 1, 2, 2, 2, 1, 0, 0, 2, 0, 0, 1, 2, 1, 2, 0, 1, 2, 1, 0, 1, 1, 2, 2, 2, 0, 2, 2, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2, 0, 1, 2, 2, 2, 0, 1, 1, 1, 2, 2, 2, 1, 2, 0, 0, 1, 0, 2, 2, 2, 2, 2, 0, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 0, 0, 1, 0, 1, 0, 0, 2, 0, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 0, 1, 0, 1, 2, 2, 2, 1, 0, 0, 1, 0, 2, 1, 0, 0, 1, 0, 2, 1, 0, 0, 2, 0, 2, 2, 1, 2, 2, 0, 2, 2, 0, 1, 0, 1, 0, 0, 1, 2, 2, 0, 0, 1, 0, 2, 2, 2, 2, 1, 2, 0, 1, 1, 0, 2, 0, 2, 2, 2, 0, 1, 2, 0, 2, 0, 2, 2, 0, 1, 1, 1, 'L']


In [103]:
for game in tourney_sentences:
    print(game)

['actr_lag2_decay', 'adddriftbot2', 0, 2, 1, 2, 1, 0, 0, 2, 0, 2, 1, 0, 0, 2, 0, 1, 1, 1, 2, 2, 'W']
['adddriftbot2', 'actr_lag2_decay', 2, 0, 2, 1, 0, 1, 2, 0, 2, 0, 0, 1, 2, 0, 1, 0, 1, 1, 2, 2, 'L']
['actr_lag2_decay', 'addshiftbot3', 0, 0, 1, 0, 1, 1, 2, 2, 0, 1, 1, 1, 2, 2, 2, 1, 2, 0, 0, 2, 'W']
['addshiftbot3', 'actr_lag2_decay', 0, 0, 0, 1, 1, 1, 2, 2, 1, 0, 1, 1, 2, 2, 1, 2, 0, 2, 2, 0, 'L']
['actr_lag2_decay', 'antiflatbot', 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 'W']
['antiflatbot', 'actr_lag2_decay', 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 'L']
['actr_lag2_decay', 'antirotnbot', 2, 1, 0, 0, 0, 2, 2, 1, 1, 0, 0, 1, 0, 0, 0, 0, 2, 0, 1, 2, 'W']
['antirotnbot', 'actr_lag2_decay', 1, 2, 0, 0, 2, 0, 1, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 2, 2, 1, 'L']
['actr_lag2_decay', 'biopic', 2, 0, 2, 0, 2, 0, 1, 0, 1, 0, 1, 2, 1, 2, 0, 2, 0, 1, 1, 1, 'L']
['biopic', 'actr_lag2_decay', 0, 2, 0, 2, 0, 2, 0, 1, 0, 1, 2, 1, 2, 1, 2, 0, 1, 0, 1, 1, 'W']
['actr

In [119]:
ratings = F.mean(axis=1)

In [120]:
agent_ratings = [(ratings[i], roshambo_bot_names[i]) for i in range(F.shape[0])]

In [121]:
agent_ratings = sorted(agent_ratings, key=lambda x: x[0])

In [137]:
F.shape

(43, 43)

In [122]:
agent_ratings

[(-60.13255813953489, 'antiflatbot'),
 (-58.03023255813954, 'rockbot'),
 (-51.753488372093024, 'rotatebot'),
 (-38.99767441860465, 'copybot'),
 (-25.690697674418605, 'freqbot2'),
 (-17.43255813953488, 'r226bot'),
 (-11.32093023255814, 'driftbot'),
 (-11.220930232558139, 'sunCrazybot'),
 (-10.80232558139535, 'switchbot'),
 (-6.523255813953488, 'adddriftbot2'),
 (-4.20232558139535, 'switchalot'),
 (-3.1372093023255823, 'peterbot'),
 (-2.569767441860465, 'flatbot3'),
 (-2.3418604651162793, 'addshiftbot3'),
 (-0.9558139534883718, 'textbot'),
 (-0.46279069767441855, 'foxtrotbot'),
 (-0.38604651162790693, 'pibot'),
 (0.07674418604651163, 'randbot'),
 (0.5744186046511636, 'inocencio'),
 (4.420930232558139, 'debruijn81'),
 (6.004651162790699, 'sunNervebot'),
 (7.597674418604651, 'multibot'),
 (7.869767441860464, 'antirotnbot'),
 (9.239534883720934, 'mixed_strategy'),
 (9.346511627906976, 'shofar'),
 (10.030232558139534, 'markovbails'),
 (10.13720930232558, 'piedra'),
 (10.613953488372093, 'mar