In [1]:
import numpy as np
import random
from gymnasium.spaces import Box, Space
from poke_env.player import Gen9EnvSinglePlayer, RandomPlayer, Player
from poke_env.environment.abstract_battle import AbstractBattle
from poke_env.teambuilder import Teambuilder

# from poke_env.player

In [2]:
from poke_env import AccountConfiguration, ServerConfiguration
from poke_env.teambuilder.teambuilder import Teambuilder


class QAgent(Player):
    def __init__(self, account_configuration: AccountConfiguration | None = None, *, avatar: str | None = None, battle_format: str = "gen9randombattle", log_level: int | None = None, max_concurrent_battles: int = 1, accept_open_team_sheet: bool = False, save_replays: bool | str = False, server_configuration: ServerConfiguration | None = None, start_timer_on_battle_start: bool = False, start_listening: bool = True, ping_interval: float | None = 20, ping_timeout: float | None = 20, team: str | Teambuilder | None = None):
        super().__init__(account_configuration, avatar=avatar, battle_format=battle_format, log_level=log_level, max_concurrent_battles=max_concurrent_battles, accept_open_team_sheet=accept_open_team_sheet, save_replays=save_replays, server_configuration=server_configuration, start_timer_on_battle_start=start_timer_on_battle_start, start_listening=start_listening, ping_interval=ping_interval, ping_timeout=ping_timeout, team=team)

        self.q_table = {}
        self.epsilon = 0.9
        self.gamma = 0.9
        self.alpha = 0.1
        self.last_state = None
        self.last_action = None
        self.current_state = None

    def embed_moves(self, battle: AbstractBattle): # 2 to 13
        embedding = []
        for move in battle.available_moves:
            embedding += [move.base_power, move.type, move.category]

        return embedding

    def list_to_tuple(self, embedding):
        return tuple(embedding)

    def embed_pokemon(self, battle: AbstractBattle): # 0 and 1
        embedding = []

        # embedding += battle.active_pokemon.base_stats
        embedding += battle.active_pokemon.current_hp
        embedding += battle.active_pokemon.type

        return embedding

    def embed_battle(self, battle: AbstractBattle):
        """Return a list containing info about the game state"""
        embedding = (battle.opponent_active_pokemon.base_species,)
        # embedding += self.embed_pokemon(battle)
        # embedding += self.embed_moves(battle)
        # embedding += [battle.opponent_active_pokemon]

        return embedding

    def choose_move(self, battle):
        print(self.q_table)
        encoding = self.embed_battle(battle)

        self.last_state = self.current_state
        self.current_state = encoding

        if battle.turn > 1:
            self.update_q_table(battle)

        if encoding in self.q_table:
            if random.random() < self.epsilon:
                best_move = np.argmax(self.q_table[encoding])

                self.last_action = best_move
                return self.select_move(best_move, battle)

        random_move = int(random.random() * len(battle._available_moves))
        self.last_action = random_move

        return self.select_move(random_move, battle)

    def select_move(self, move, battle):
        return self.create_order(battle.available_moves[move])

    def calc_reward(self, battle):
        """
        embedding = self.embed_battle(battle)

        reward = embedding[0] - embedding[-1]"""

        embedding = self.embed_battle(battle)

        return 5

    def update_q_table(self, battle):
        if self.last_state not in self.q_table:
            self.q_table[self.last_state] = [0] * 4

        self.q_table[self.last_state][self.last_action] = self.calc_reward(battle)

        # q_old = self.q_table[self.last_state][self.last_action]
        # q_update = self.alpha

In [3]:
team_1 = """
Goodra (M) @ Assault Vest
Ability: Sap Sipper
EVs: 248 HP / 252 SpA / 8 Spe
Modest Nature
IVs: 0 Atk
- Flamethrower
"""

In [4]:
p1 = QAgent(team=team_1, battle_format="gen9ou")
p2 = RandomPlayer(team=team_1, battle_format="gen9ou")

await p1.battle_against(p2, n_battles=1)

{}
{}
{('goodra',): [5, 0, 0, 0]}
{('goodra',): [5, 0, 0, 0]}
{('goodra',): [5, 0, 0, 0]}
{('goodra',): [5, 0, 0, 0]}
{('goodra',): [5, 0, 0, 0]}
{('goodra',): [5, 0, 0, 0]}
{('goodra',): [5, 0, 0, 0]}
{('goodra',): [5, 0, 0, 0]}
{('goodra',): [5, 0, 0, 0]}
{('goodra',): [5, 0, 0, 0]}
{('goodra',): [5, 0, 0, 0]}
