In [1]:
import asyncio

In [2]:
import numpy as np
import torch

In [3]:
from gym.spaces import Space, Box

In [4]:
from poke_env import AccountConfiguration
from poke_env.environment import AbstractBattle
from poke_env.player import Player
#from poke_env.player import Gen8EnvSinglePlayer

In [5]:
from custom.simple_rl import SimpleRLEnv

In [6]:
battle_format = 'gen8randombattle'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
class MaxDamagePlayer(Player):
    """ Adapted from poke-env documentation.

    References:
    [1] Creating a simple max damage player.
        https://poke-env.readthedocs.io/en/stable/examples/max_damage_player.html
    """
    
    def choose_move(self, battle: AbstractBattle):
        # If the player can attack, it will
        if battle.available_moves:
            # Finds the best move among available ones
            best_move = max(battle.available_moves, key=lambda move: move.base_power)
            return self.create_order(best_move)

        # If no attack is available, a random switch will be made
        else:
            return self.choose_random_move(battle)

In [8]:
opponent = MaxDamagePlayer(battle_format=battle_format)

In [9]:
simple_env = SimpleRLEnv(
        battle_format=battle_format,
        opponent=opponent,
        start_challenging=True,
        account_configuration=AccountConfiguration("customTrainbot", None),
    )

Turn:  1 putting observations in...
Turn:  1 obs put in.
Turn:  1 getting 1 action out to move...
Turn:  1 got an action out 9
Turn:  1 putting observations in...
Turn:  1 obs put in.
Turn:  1 getting 1 action out to move...
Turn:  1 got an action out 5
Turn:  2 putting observations in...
Turn:  2 obs put in.
Turn:  2 getting 1 action out to move...
Turn:  2 got an action out 13
Turn:  3 putting observations in...
Turn:  3 obs put in.
Turn:  3 getting 1 action out to move...
Turn:  3 got an action out 18
Turn:  3 putting observations in...
Turn:  3 obs put in.
Turn:  3 getting 1 action out to move...


In [10]:
# step thru with random actions
def step():
    simple_env.action_space.sample()
    action = torch.tensor([[simple_env.action_space.sample()]], device=device, dtype=torch.long)
    observation, reward, terminated, truncated, _ = simple_env.step(action.item())
    print(reward)

In [15]:
step()

TRAINER: putting action in... For turn 3
TRAINER: action put in. For turn 3
TRAINER: getting observation out... For turn 3
TRAINER: observation got out. For turn 3
TRAINER: observation tracker is at:  3
-2.2851711026615966


In [14]:
# Stop Training (on-going matches will forfiet)
simple_env.close()