In [1]:
import numpy as np
from gymnasium.spaces import Space, Box
from poke_env.player import Gen8EnvSinglePlayer
from poke_env.environment.battle import AbstractBattle
from poke_env.data import GenData
from poke_env.ps_client.account_configuration import AccountConfiguration
import uuid


class SimpleRLPlayer(Gen8EnvSinglePlayer):
    def calc_reward(self, last_battle, current_battle) -> float:
        return self.reward_computing_helper(
            current_battle, fainted_value=2.0, hp_value=1.0, victory_value=30.0
        )

    def embed_battle(self, battle: AbstractBattle):
        # -1 indicates that the move does not have a base power
        # or is not available
        moves_base_power = -np.ones(4)
        moves_dmg_multiplier = np.ones(4)
        for i, move in enumerate(battle.available_moves):
            moves_base_power[i] = (
                move.base_power / 100
            )  # Simple rescaling to facilitate learning
            if move.type:
                moves_dmg_multiplier[i] = move.type.damage_multiplier(
                    battle.opponent_active_pokemon.type_1,
                    battle.opponent_active_pokemon.type_2,
                    type_chart=GenData.from_gen(8).type_chart
                )

        # We count how many pokemons have fainted in each team
        fainted_mon_team = len([mon for mon in battle.team.values() if mon.fainted]) / 6
        fainted_mon_opponent = (
            len([mon for mon in battle.opponent_team.values() if mon.fainted]) / 6
        )

        # Final vector with 10 components
        final_vector = np.concatenate(
            [
                moves_base_power,
                moves_dmg_multiplier,
                [fainted_mon_team, fainted_mon_opponent],
            ]
        )
        return np.float32(final_vector)

    def describe_embedding(self) -> Space:
        low = [-1, -1, -1, -1, 0, 0, 0, 0, 0, 0]
        high = [3, 3, 3, 3, 4, 4, 4, 4, 1, 1]
        return Box(
            np.array(low, dtype=np.float32),
            np.array(high, dtype=np.float32),
            dtype=np.float32,
        )

In [2]:
def get_unique_account():
    username = str(uuid.uuid4()).replace('-', '')[0:18]
    print(f'Generated username: {username} ({len(username)})')

    return AccountConfiguration(
        username=username,
        password='some-very-secure-password'
    )

In [6]:
from poke_env.player import RandomPlayer

opponent = RandomPlayer(
    battle_format="gen8randombattle",
    account_configuration=get_unique_account()
)
test_env = SimpleRLPlayer(
    battle_format="gen8randombattle", 
    opponent=opponent,
    account_configuration=get_unique_account(),
    start_challenging=True
)

Generated username: b76f4d1d6f2e4123bf (18)
Generated username: 16c3c3e7124d4f7aaf (18)


In [7]:
from poke_env.environment.battle import Battle
from logging import Logger

battle = Battle(
    battle_tag = 'AAA',
    username = 'BBB',
    logger = Logger('CCC', 0),
    gen = 8,
)
embedding = test_env.embed_battle(battle)
embedding, test_env.describe_embedding().contains(embedding)

(array([-1., -1., -1., -1.,  1.,  1.,  1.,  1.,  0.,  0.], dtype=float32),
 True)

In [8]:
from gymnasium.utils.env_checker import check_env

try:
    check_env(test_env, skip_render_check=True, skip_close_check=True)
except KeyboardInterrupt as e:
    print("Env check interrupted")
finally:
    test_env.close()

AssertionError: Deterministic step observations are not equivalent for the same seed and action