# PettingZoo

In [14]:
import time
import pandas as pd
from pettingzoo.classic import hanabi_v5

# Initialize the global timing table list
global_timing_table = []

# Initialize the environment
env = hanabi_v5.env(colors=5, ranks=5, players=2, hand_size=5, max_information_tokens=8,
                    max_life_tokens=3, observation_type='card_knowledge')

# Conduct the experiment over 100000 iterations
for i in range(100000):

    # Initialize the timing table for this iteration
    timing_table = []

    # Time the reset function
    start_time = time.time()
    env.reset(seed=42)
    reset_time = time.time() - start_time
    timing_table.append({
        "Function": "env.reset()",
        "Action": None,
        "Time (seconds)": reset_time
    })

    # Iterate through agents and time each function call
    for agent in env.agent_iter():
        # Time the env.last() function
        start_time = time.time()
        observation, reward, termination, truncation, info = env.last()
        last_time = time.time() - start_time
        timing_table.append({
            "Function": "env.last()",
            "Action": None,
            "Time (seconds)": last_time
        })

        # Time the env.action_space() function
        start_time = time.time()
        action_space = env.action_space(agent)
        action_space_time = time.time() - start_time
        timing_table.append({
            "Function": "env.action_space()",
            "Action": None,
            "Time (seconds)": action_space_time
        })
    
        # Time the env.observe() function
        start_time = time.time()
        agent_obs = env.observe(agent)
        observe_time = time.time() - start_time
        timing_table.append({
            "Function": "env.observe()",
            "Action": None,
            "Time (seconds)": observe_time
        })

        # Determine action based on termination or truncation
        if termination or truncation:
            action = None
        else:
            mask = observation["action_mask"]
            # Sample an action
            start_time = time.time()
            action = env.action_space(agent).sample(mask)
            sample_time = time.time() - start_time
            
            # Check if action is a dictionary or an integer
            if isinstance(action, dict):
                action_index = action.get('card_index', -1)
            else:
                action_index = int(action)
            
            timing_table.append({
                "Function": "env.action_space(agent).sample()",
                "Action": None,
                "Action Index": action_index,
                "Time (seconds)": sample_time
            })

        # Time the env.step() function with the action
        start_time = time.time()
        env.step(action)
        step_time = time.time() - start_time
        timing_table.append({
            "Function": "env.step()",
            "Action": str(action),
            "Action Index": action_index,
            "Time (seconds)": step_time
        })

    # Close the environment
    start_time = time.time()
    env.close()
    close_time = time.time() - start_time
    timing_table.append({
        "Function": "env.close()",
        "Action": None,
        "Time (seconds)": close_time
    })

    # Append the current iteration's timing table to the global timing table
    global_timing_table.extend(timing_table)

# Convert the global timing table to a Pandas DataFrame
df = pd.DataFrame(global_timing_table)

print(df)

                                 Function Action  Time (seconds)  Action Index
0                             env.reset()   None    3.611803e-03           NaN
1                              env.last()   None    2.021790e-04           NaN
2                      env.action_space()   None    3.099442e-06           NaN
3                           env.observe()   None    7.867813e-06           NaN
4        env.action_space(agent).sample()   None    2.131462e-04          13.0
...                                   ...    ...             ...           ...
6491810                        env.last()   None    1.382828e-05           NaN
6491811                env.action_space()   None    9.536743e-07           NaN
6491812                     env.observe()   None    5.006790e-06           NaN
6491813                        env.step()   None    9.059906e-06           8.0
6491814                       env.close()   None    1.192093e-06           NaN

[6491815 rows x 4 columns]


In [57]:
grouped_df = df.groupby("Function")['Time (seconds)'].agg(['mean', 'std'])
grouped_df['Time ± StdDev'] = grouped_df['mean'].round(6).astype(str) + ' ± ' + grouped_df['std'].round(6).astype(str)

print(grouped_df['Time ± StdDev'])

Function
env.action_space()                        1e-06 ± 1e-06
env.action_space(agent).sample()      4.7e-05 ± 2.8e-05
env.close()                               1e-06 ± 1e-06
env.last()                              1.8e-05 ± 5e-06
env.observe()                             5e-06 ± 2e-06
env.reset()                         0.000828 ± 0.001703
env.step()                          0.000279 ± 0.000137
Name: Time ± StdDev, dtype: object


In [59]:
# Filter out rows where 'Action' is None
action_df = df[df['Action'].notna()]

# Grouping actions into predefined groups
def action_group(card_index):
    if card_index in [0, 1, 2, 3, 4]:
        return 'Group 1'
    elif card_index in [5, 6, 7, 8, 9]:
        return 'Group 2'
    elif card_index in [10, 11, 12, 13, 14]:
        return 'Group 3'
    elif card_index in [15, 16, 17, 18, 19]:
        return 'Group 4'
    else:
        return 'Other'

# Apply the grouping function
action_df['Action Group'] = action_df['Action Index'].apply(action_group)

# Group by 'Action Group' and calculate mean and standard deviation
grouped = action_df.groupby('Action Group')['Time (seconds)'].agg(['mean', 'std'])

# Combine mean and standard deviation into a single column with the ± notation
grouped['Time ± StdDev'] = grouped['mean'].round(6).astype(str) + ' ± ' + grouped['std'].round(6).astype(str)

# Print the results
print(grouped[['Time ± StdDev']])

                    Time ± StdDev
Action Group                     
Group 1         0.00036 ± 8.6e-05
Group 2       0.000209 ± 0.000167
Group 3        0.000319 ± 6.5e-05
Group 4        0.000319 ± 6.1e-05


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  action_df['Action Group'] = action_df['Action Index'].apply(action_group)


# Google DeepMind

In [2]:
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Example code demonstrating the Python Hanabi interface."""

from __future__ import print_function
import numpy as np
import os
import sys
sys.path.append(os.path.abspath('../hanabi_learning_environment'))
import pyhanabi

def run_game(game_parameters):
  """Play a game, selecting random actions."""

  def print_state(state):
    """Print some basic information about the state."""
    print("")
    print("Current player: {}".format(state.cur_player()))
    print(state)

    # Example of more queries to provide more about this state. For
    # example, bots could use these methods to to get information
    # about the state in order to act accordingly.
    print("### Information about the state retrieved separately ###")
    print("### Information tokens: {}".format(state.information_tokens()))
    print("### Life tokens: {}".format(state.life_tokens()))
    print("### Fireworks: {}".format(state.fireworks()))
    print("### Deck size: {}".format(state.deck_size()))
    print("### Discard pile: {}".format(str(state.discard_pile())))
    print("### Player hands: {}".format(str(state.player_hands())))
    print("")

  def print_observation(observation):
    """Print some basic information about an agent observation."""
    print("--- Observation ---")
    print(observation)

    print("### Information about the observation retrieved separately ###")
    print("### Current player, relative to self: {}".format(
        observation.cur_player_offset()))
    print("### Observed hands: {}".format(observation.observed_hands()))
    print("### Card knowledge: {}".format(observation.card_knowledge()))
    print("### Discard pile: {}".format(observation.discard_pile()))
    print("### Fireworks: {}".format(observation.fireworks()))
    print("### Deck size: {}".format(observation.deck_size()))
    move_string = "### Last moves:"
    for move_tuple in observation.last_moves():
      move_string += " {}".format(move_tuple)
    print(move_string)
    print("### Information tokens: {}".format(observation.information_tokens()))
    print("### Life tokens: {}".format(observation.life_tokens()))
    print("### Legal moves: {}".format(observation.legal_moves()))
    print("--- EndObservation ---")

  def print_encoded_observations(encoder, state, num_players):
    print("--- EncodedObservations ---")
    print("Observation encoding shape: {}".format(encoder.shape()))
    print("Current actual player: {}".format(state.cur_player()))
    for i in range(num_players):
      print("Encoded observation for player {}: {}".format(
          i, encoder.encode(state.observation(i))))
    print("--- EndEncodedObservations ---")

  game = pyhanabi.HanabiGame(game_parameters)
  print(game.parameter_string(), end="")
  obs_encoder = pyhanabi.ObservationEncoder(
      game, enc_type=pyhanabi.ObservationEncoderType.CANONICAL)

  state = game.new_initial_state()
  while not state.is_terminal():
    if state.cur_player() == pyhanabi.CHANCE_PLAYER_ID:
      state.deal_random_card()
      continue

    print_state(state)

    observation = state.observation(state.cur_player())
    print_observation(observation)
    print_encoded_observations(obs_encoder, state, game.num_players())

    legal_moves = state.legal_moves()
    print("")
    print("Number of legal moves: {}".format(len(legal_moves)))

    move = np.random.choice(legal_moves)
    print("Chose random legal move: {}".format(move))

    state.apply_move(move)

  print("")
  print("Game done. Terminal state:")
  print("")
  print(state)
  print("")
  print("score: {}".format(state.score()))

run_game({"players": 2, "random_start_player": True})

random_start_player=true
seed=-1744314842
max_life_tokens=3
hand_size=5
observation_type=1
max_information_tokens=8
ranks=5
colors=5
players=2

Current player: 1
Life tokens: 3
Info tokens: 8
Fireworks: R0 Y0 G0 W0 B0 
Hands:
R2 || XX|RYGWB12345
R1 || XX|RYGWB12345
B1 || XX|RYGWB12345
G4 || XX|RYGWB12345
G2 || XX|RYGWB12345
-----
Cur player
B1 || XX|RYGWB12345
B2 || XX|RYGWB12345
R4 || XX|RYGWB12345
G5 || XX|RYGWB12345
G1 || XX|RYGWB12345
Deck size: 40
Discards:
### Information about the state retrieved separately ###
### Information tokens: 8
### Life tokens: 3
### Fireworks: [0, 0, 0, 0, 0]
### Deck size: 40
### Discard pile: []
### Player hands: [[R2, R1, B1, G4, G2], [B1, B2, R4, G5, G1]]

--- Observation ---
Life tokens: 3
Info tokens: 8
Fireworks: R0 Y0 G0 W0 B0 
Hands:
Cur player
XX || XX|RYGWB12345
XX || XX|RYGWB12345
XX || XX|RYGWB12345
XX || XX|RYGWB12345
XX || XX|RYGWB12345
-----
R2 || XX|RYGWB12345
R1 || XX|RYGWB12345
B1 || XX|RYGWB12345
G4 || XX|RYGWB12345
G2 || XX|RYGWB12

In [21]:
import time
import pandas as pd
import numpy as np
import sys
import os

sys.path.append(os.path.abspath('../hanabi-learning-environment/hanabi_learning_environment'))
import pyhanabi

# Initialize the global timing table list
global_timing_table = []

# Number of iterations to run
num_iterations = 100000  # Adjust the number of iterations as needed

# Define game parameters
game_parameters = {"players": 2, "random_start_player": True}

# Initialize the game
game = pyhanabi.HanabiGame(game_parameters)

# Initialize observation encoder
obs_encoder = pyhanabi.ObservationEncoder(game, enc_type=pyhanabi.ObservationEncoderType.CANONICAL)

# Conduct the experiment over multiple iterations
for i in range(num_iterations):
    # Initialize the timing table for this iteration
    timing_table = []

    # Initialize the game state
    start_time = time.time()
    state = game.new_initial_state()
    new_state_time = time.time() - start_time
    timing_table.append({
        "Function": "game.new_initial_state()",
        "Action": None,
        "Time (seconds)": new_state_time
    })

    # Loop until the game is terminal
    while not state.is_terminal():
        if state.cur_player() == pyhanabi.CHANCE_PLAYER_ID:
            start_time = time.time()
            state.deal_random_card()
            deal_time = time.time() - start_time
            timing_table.append({
                "Function": "state.deal_random_card()",
                "Action": None,
                "Time (seconds)": deal_time
            })
            continue

        # Time the state.cur_player() function
        start_time = time.time()
        current_player = state.cur_player()
        cur_player_time = time.time() - start_time
        timing_table.append({
            "Function": "state.cur_player()",
            "Action": None,
            "Time (seconds)": cur_player_time
        })

        # Time the observation function
        start_time = time.time()
        observation = state.observation(current_player)
        observation_time = time.time() - start_time
        timing_table.append({
            "Function": "state.observation(agent)",
            "Action": None,
            "Time (seconds)": observation_time
        })

        # Time the encoder function
        start_time = time.time()
        encoded_observation = obs_encoder.encode(state.observation(current_player))
        encode_time = time.time() - start_time
        timing_table.append({
            "Function": "obs_encoder.encode(observation)",
            "Action": None,
            "Time (seconds)": encode_time
        })

        # Time the legal_moves() function
        start_time = time.time()
        legal_moves = state.legal_moves()
        legal_moves_time = time.time() - start_time
        timing_table.append({
            "Function": "state.legal_moves()",
            "Action": None,
            "Time (seconds)": legal_moves_time
        })

        # Time the random action selection
        start_time = time.time()
        move = np.random.choice(legal_moves)
        random_action_time = time.time() - start_time
        timing_table.append({
            "Function": "np.random.choice(legal_moves)",
            "Action": None,
            "Time (seconds)": random_action_time
        })

        # Time the state.apply_move() function
        start_time = time.time()
        state.apply_move(move)
        apply_move_time = time.time() - start_time
        timing_table.append({
            "Function": "state.apply_move(action)",
            "Action": str(move),
            "Time (seconds)": apply_move_time
        })

    # Record the time to check if the game state is terminal
    start_time = time.time()
    is_terminal = state.is_terminal()
    is_terminal_time = time.time() - start_time
    timing_table.append({
        "Function": "state.is_terminal()",
        "Action": None,
        "Time (seconds)": is_terminal_time
    })

    # Record the score at the end of the game
    start_time = time.time()
    score = state.score()
    score_time = time.time() - start_time
    timing_table.append({
        "Function": "state.score()",
        "Action": None,
        "Time (seconds)": score_time
    })

    # Append the current iteration's timing table to the global timing table
    global_timing_table.extend(timing_table)

# Convert the global timing table to a Pandas DataFrame
df = pd.DataFrame(global_timing_table)

print(df)

                              Function    Action  Time (seconds)
0             game.new_initial_state()      None    6.101131e-04
1             state.deal_random_card()      None    1.716614e-05
2             state.deal_random_card()      None    4.291534e-06
3             state.deal_random_card()      None    3.814697e-06
4             state.deal_random_card()      None    4.053116e-06
...                                ...       ...             ...
9582711            state.legal_moves()      None    2.503395e-05
9582712  np.random.choice(legal_moves)      None    3.194809e-05
9582713       state.apply_move(action)  (Play 1)    9.536743e-07
9582714            state.is_terminal()      None    9.536743e-07
9582715                  state.score()      None    9.536743e-07

[9582716 rows x 3 columns]


In [23]:
grouped_df = df.groupby("Function")['Time (seconds)'].agg(['mean', 'std'])
grouped_df['Time ± StdDev'] = grouped_df['mean'].round(10).astype(str) + ' ± ' + grouped_df['std'].round(10).astype(str)

print(grouped_df['Time ± StdDev'])

Function
game.new_initial_state()              6.1413e-06 ± 3.6569e-06
np.random.choice(legal_moves)       3.42243e-05 ± 1.39686e-05
obs_encoder.encode(observation)    0.0001077497 ± 4.33226e-05
state.apply_move(action)               1.271e-06 ± 1.2594e-06
state.cur_player()                      4.107e-07 ± 7.451e-07
state.deal_random_card()              3.3288e-06 ± 3.4364e-06
state.is_terminal()                     5.646e-07 ± 9.546e-07
state.legal_moves()                  2.59297e-05 ± 0.00033594
state.observation(agent)            1.14532e-05 ± 1.43876e-05
state.score()                           6.534e-07 ± 9.097e-07
Name: Time ± StdDev, dtype: object


In [17]:
# Drop entries where 'Action' is None
filtered_df = df.dropna(subset=["Action"])

import re

# Function to categorize actions
def categorize_action(action):
    if re.search(r"\(Discard \d\)", action):
        return "Discard"
    elif re.search(r"\(Play \d\)", action):
        return "Play"
    elif re.search(r"\(Reveal player \+\d color \w\)", action):
        return "Reveal Color"
    elif re.search(r"\(Reveal player \+\d rank \d\)", action):
        return "Reveal Rank"
    else:
        return "Other"

# Apply the categorization to the Action column
filtered_df['Action Category'] = filtered_df['Action'].apply(categorize_action)

# Group by both 'Function' and 'Action Category', then calculate mean and std
grouped_action_df = filtered_df.groupby(["Function", "Action Category"])['Time (seconds)'].agg(['mean', 'std'])

# Create the "Time ± StdDev" column
grouped_action_df['Time ± StdDev'] = grouped_action_df['mean'].round(10).astype(str) + ' ± ' + grouped_action_df['std'].round(10).astype(str)

# Reset index to make the DataFrame easier to read
grouped_action_df = grouped_action_df.reset_index()

# Display the result
print(grouped_action_df[['Action Category', 'Time ± StdDev']])

  Action Category            Time ± StdDev
0         Discard   1.413e-06 ± 1.3608e-06
1            Play  1.4003e-06 ± 1.5403e-06
2    Reveal Color  1.1074e-06 ± 1.2882e-06
3     Reveal Rank  1.0922e-06 ± 1.1744e-06


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Action Category'] = filtered_df['Action'].apply(categorize_action)
