### Testing MC Env Learning in 2 v 2 environment

In [1]:
import numpy as np

from vgc.datatypes.Objects import PkmTeam, Pkm, GameState, Weather
from vgc.engine.PkmBattleEnv import PkmBattleEnv
from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator

from vgc.datatypes.Constants import TYPE_CHART_MULTIPLIER, MAX_HIT_POINTS, MOVE_MAX_PP, DEFAULT_TEAM_SIZE
from vgc.datatypes.Objects import PkmMove, Pkm, PkmTeam, GameState, Weather
from vgc.datatypes.Types import PkmStat, PkmType, WeatherCondition, \
    N_TYPES, N_STATUS, N_STATS, N_ENTRY_HAZARD, N_WEATHER, PkmStatus, PkmEntryHazard

In [None]:
'''
need to turn the states into the dict
    want to store counts and running mean of the reward

TEST need to select the initial action
TEST need to then to attack later
need to convert the attack action into the best attack
need to convert the swap action into a swap
need to store the result of the battle

need to add the hiding part

need to test how many battles can get through in how much time

can I get the best dmg from both teams even if the pkm stuff is hidden and not revealed?
    probably yes since passing in the team specific state

can check to see how accurate the attack function is

saving the action dict

possibly store the state dict attack action for non first actions as well


'''

In [2]:
def turn_agent_action_into_env_action(action, agent_game_state):
        '''
        Action values are
        0: select best move
        1: switch to first pkm
        2: switch to second pkm

        Env actions are
        0 to 3: action of active pokm
        4: switch to first pkm
        5: switch to second pkm
        '''
        best_damage = -1
        if action == 0:
            # get best dmg action
            action, best_damage = get_best_active_damage_action(agent_game_state)
        else:
            # switch to first or second pkm if alive
            if action == 1 or action == 2:
                pkm = agent_game_state.teams[0].party[action-1]
                if pkm.fainted() or pkm.hp <= 0.0:
                    action, best_damage = get_best_active_damage_action(agent_game_state)
                else:
                    action = action + 3
            else:
                action, best_damage = get_best_active_damage_action(agent_game_state)

        return action, best_damage


def get_best_active_damage_action(g: GameState):
    '''
    '''
    # Get weather condition
    weather = g.weather.condition

    # Get my Pokémon team
    my_team = g.teams[0]
    my_pkms = [my_team.active]

    # Get opponent's team
    opp_team = g.teams[1]
    opp_active = opp_team.active

    opp_active_type = opp_active.type
    opp_defense_stage = opp_team.stage[PkmStat.DEFENSE]

    # Initialize variables for the best move and its damage
    best_move_id = -1
    best_damage = -np.inf

    # Iterate over all my Pokémon and their moves to find the most damaging move
    for i, pkm in enumerate(my_pkms):
        if i == 0:
            my_attack_stage = my_team.stage[PkmStat.ATTACK]
        else:
            my_attack_stage = 0

        for j, move in enumerate(pkm.moves):
            
            damage = estimate_damage(move.type, pkm.type, move.power, opp_active_type, my_attack_stage,
                                        opp_defense_stage, weather)
            
            # Check if the current move has higher damage than the previous best move
            if damage > best_damage:
                best_move_id = j + i * 4 # think for 2024 j is 0 to 3 for each
                best_damage = damage

    if best_move_id < 0 or best_move_id > 3:
        print(f"Error: best move id {best_move_id} not in expected range")
        best_move_id = 0

    return best_move_id, best_damage


def estimate_damage(move_type: PkmType, pkm_type: PkmType, move_power: float, opp_pkm_type: PkmType,
                    attack_stage: int, defense_stage: int, weather: WeatherCondition) -> float:
        '''
        Not from original code. from updated repo
        '''
        stab = 1.5 if move_type == pkm_type else 1.
        if (move_type == PkmType.WATER and weather == WeatherCondition.RAIN) or (
                move_type == PkmType.FIRE and weather == WeatherCondition.SUNNY):
            weather = 1.5
        elif (move_type == PkmType.WATER and weather == WeatherCondition.SUNNY) or (
                move_type == PkmType.FIRE and weather == WeatherCondition.RAIN):
            weather = .5
        else:
            weather = 1.
        stage_level = attack_stage - defense_stage
        stage = (stage_level + 2.) / 2 if stage_level >= 0. else 2. / (np.abs(stage_level) + 2.)
        damage = TYPE_CHART_MULTIPLIER[move_type][opp_pkm_type] * stab * weather * stage * move_power

        #print(damage, move_type, pkm_type, move_power, opp_pkm_type, attack_stage, defense_stage, weather)
        return damage

In [None]:
def get_running_mean(old_mean, count, new_value):
    '''
    '''
    STOPPED HERE TO NEED TO VERIFY THIS
    #not sure if count is old count or new count
    #https://stackoverflow.com/questions/12636613/how-to-calculate-moving-average-without-keeping-the-count-and-data-total
    # return old_mean + (new_value - old_mean) / count

def turn_game_state_into_dict(action_dict, game_state_agent, game_state_opp, agent_first_move):
    '''
    '''
    pass

def add_results_to_action_dict(action_dict, state_key, agent_first_move, agent_reward):
    '''
    '''
    if state_key in action_dict:
        if agent_first_move in action_dict[state_key]:
            action_dict[state_key][agent_first_move]["count"] += 1
            action_dict[state_key][agent_first_move]["reward"] = get_running_mean(action_dict[state_key][agent_first_move]["reward"],
                                                                                  action_dict[state_key][agent_first_move]["count"], agent_reward)
        action_dict[state_key]["count"] += 1
        action_dict[state_key]["reward"] += agent_reward

In [None]:
num_battles = 1

team_generator = RandomTeamGenerator(2)

winner_dict = {}

action_dict = {}

max_episode_steps = 250


for battle_idx in range(num_battles):
    
    agent_team = team_generator.get_team().get_battle_team([0, 1, ])
    opp_team = team_generator.get_team().get_battle_team([0, 1, ])

    env = PkmBattleEnv((agent_team, opp_team),
                   encode=(True, False))  # set new environment with teams

    game_state, info = env.reset()

    is_first_move = True
    agent_first_move = None

    for episode_step in range(max_episode_steps):
        if is_first_move:
            if np.random.rand() < 0.5:
                agent_action = 0
                agent_first_move = 'attack'
            else:
                agent_action = 1
                agent_first_move = 'swap'
            is_first_move = False
        else:
            agent_action = 0

        agent_env_action, agent_best_damage = turn_agent_action_into_env_action(agent_action, game_state[0])
        opp_action, opp_best_damage = get_best_active_damage_action(game_state[1])

        if agent_action == 1 and agent_env_action != 4:
            print("Error agent action is 1 but env action is not 4 ")
        elif agent_action == 0:
            if (agent_env_action < 0 or agent_env_action > 3):
                print("Error agent action is 0 but env action is not 0 to 3 ")
            if agent_best_damage < 0:
                print("Error agent action is 0 but best damage is negative")

        if opp_action < 0 or opp_action > 3:
            print("Error opp action is not 0 to 3")
        
        if opp_best_damage < 0:
            print("Error opp best damage is negative")
        
        action_list = [agent_action, opp_action]
        game_state, reward, terminated, truncated, info = env.step(action_list)  # for inference, we don't need reward

        if episode_step == max_episode_steps - 1:
            print('Warning: max steps reached')
            terminated = True

        if terminated:
            if env.env.winner in winner_dict:
                winner_dict[env.env.winner] += 1
            else:
                winner_dict[env.env.winner] = 1
            break

print(winner_dict)


### Test new env

In [1]:
import numpy as np

from vgc.datatypes.Objects import PkmTeam, Pkm, GameState, Weather
from vgc.engine.PkmBattleEnv import PkmBattleEnv
from vgc.behaviour.BattlePolicies import RandomPlayer#, TerminalPlay

from pkm_battle_env_wrapper import NiBot, PkmBattleEnvWrapper

import importlib
import pkm_battle_env_wrapper

importlib.reload(pkm_battle_env_wrapper)
from pkm_battle_env_wrapper import NiBot, PkmBattleEnvWrapper

#### testing init and reset

In [47]:
importlib.reload(pkm_battle_env_wrapper)
from pkm_battle_env_wrapper import NiBot, PkmBattleEnvWrapper

env = PkmBattleEnvWrapper("my_id", is_debug_mode=True)

print(env.action_space)
print(env.observation_space)
obs, info = env.reset()
print(obs['state'].shape)




Discrete(3)
Dict('state': Box(-1.0, 1.0, (36,), float32))
(36,)


In [None]:
importlib.reload(pkm_battle_env_wrapper)
from pkm_battle_env_wrapper import NiBot, PkmBattleEnvWrapper
env = PkmBattleEnvWrapper("my_id", is_debug_mode=True, obs_type='simple')
obs, info = env.reset()
print(obs['state'].shape)

In [15]:
importlib.reload(pkm_battle_env_wrapper)
from pkm_battle_env_wrapper import NiBot, PkmBattleEnvWrapper

env = PkmBattleEnvWrapper("my_id", is_debug_mode=True)

# def __init__(self, id: str, obs_type: str = 'medium',  gym_space_type: str = 'Dict',
#              is_debug_mode: bool = False):

obs, info = env.reset()

def display_medium_obs(obs):

    print(obs)
    print(obs['state'].shape)
    print("surviving, hp, revealed self")
    # surviving, hp, revealed self
    current_index = 0
    current_index += 9
    print(obs['state'][:current_index])
    # opp
    print(obs['state'][current_index:current_index+9])
    current_index += 9

    # nibot dmg team vs. active
    print("nibot obs active")
    print(obs['state'][current_index:current_index+3])
    print(obs['state'][current_index:current_index+3]*480*2)
    # # nibot action chosen
    # print(obs['state'][30:36])
    current_index += 3

    print("nibot obs party")
    print(obs['state'][current_index:current_index+6]) 
    print(obs['state'][current_index:current_index+6]*480*2)
    current_index = current_index+6  

    print("type values")
    print(obs['state'][current_index:current_index+9])
    print(obs['state'][current_index:current_index+9]*2)
    current_index += 9
    #print(current_index)

    # print("move values self")
    # print(obs['state'][current_index:current_index+24])
    # current_index += 24

    # print("move values opp")
    # print(obs['state'][current_index:current_index+24])
    # print(current_index + 24)

    print(current_index)

display_medium_obs(obs)

# env = PkmBattleEnvWrapper("my_id", obs_type='simple', is_debug_mode=True)
# obs, info = env.reset()
# print(obs['state'].shape)

# env = PkmBattleEnvWrapper("my_id", obs_type='full', is_debug_mode=True)
# obs, info = env.reset()q
# print(obs['state'].shape)

{'state': array([ 1.        ,  0.25      ,  1.        ,  1.        ,  0.925     ,
        0.        ,  1.        ,  0.25      ,  0.        ,  1.        ,
        0.475     ,  1.        ,  1.        , -0.5       ,  0.        ,
        1.        , -0.5       ,  0.        ,  0.35897437,  0.05641026,
        0.21025641, -0.5       , -0.5       , -0.5       , -0.5       ,
       -0.5       , -0.5       ,  0.5       , -0.5       , -0.5       ,
        0.5       , -0.5       , -0.5       ,  0.5       , -0.5       ,
       -0.5       ], dtype=float32)}
(36,)
surviving, hp, revealed self
[1.    0.25  1.    1.    0.925 0.    1.    0.25  0.   ]
[ 1.     0.475  1.     1.    -0.5    0.     1.    -0.5    0.   ]
nibot obs active
[0.35897437 0.05641026 0.21025641]
[344.6154    54.153847 201.84616 ]
nibot obs party
[-0.5 -0.5 -0.5 -0.5 -0.5 -0.5]
[-480. -480. -480. -480. -480. -480.]
type values
[ 0.5 -0.5 -0.5  0.5 -0.5 -0.5  0.5 -0.5 -0.5]
[ 1. -1. -1.  1. -1. -1.  1. -1. -1.]
36


In [16]:
1.21875 * 2.0 * 480

1170.0

In [17]:
1 * 480 / 1170

0.41025641025641024

In [18]:
# (1170 / 480) / x = 1
# x = 1170 / 480
# 480x = 1170 
1170/480

2.4375

In [20]:
importlib.reload(pkm_battle_env_wrapper)
from pkm_battle_env_wrapper import NiBot, PkmBattleEnvWrapper

env = PkmBattleEnvWrapper("my_id", is_debug_mode=True)

max_list =[]
for i in range(10):
    obs, info = env.reset()
    max_list.append(np.max(obs['state'][18:30]))
    #print(np.max(obs['state'][18:30]), np.max(obs['state'][18:30])*480*2)

print(np.round(np.max(max_list),3), np.round(np.median(max_list),3), np.round(np.mean(max_list),3), np.round(np.min(max_list),3))

1.0 0.5 0.55 0.25


In [21]:
# obs['state'][69:83]

array([], dtype=float32)

In [69]:
# env.env.game_state_view[0].teams[0].active.max_hp
# dir(env.env)

#### Testing many env runs

In [56]:
# testing my env
# PkmBattleEnvWrapper

from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator
import pkm_battle_env_wrapper

importlib.reload(pkm_battle_env_wrapper)
from pkm_battle_env_wrapper import NiBot, PkmBattleEnvWrapper, SimpleBot

n_battles = 100 # total number of battles
#active_dict = {'active':{}, 'opp':{}}
winner_dict = {}
max_steps_list = []
t = False
battle = 0
agent_random = RandomPlayer()
# do this if want agent to swap back and forth
#env = PkmBattleEnvWrapper("asdf", is_debug_mode=True)

while battle < n_battles:
    
    # agent is always index 0
    env = PkmBattleEnvWrapper("asdf", is_debug_mode=True)
    s, _ = env.reset()
    while not t:  # True when all pkms of one of the two PkmTeam faint
        # a = [agent0.get_action(s[0]), agent1.get_action(s[1])]
        # # if a[0] < 0 or a[0] > 4:
        # #     print('ni action is ', a[0])
        # # print(a[0])
        # if a[0] in active_dict['active']:
        #     active_dict['active'][a[0]] += 1
        # else:
        #     active_dict['active'][a[0]] = 1
        #print('ni action is ', a[0], a[1])
        # if a[1] < 0 or a[1] >= 0:
        #     print('nibot action is ', a[1])
        a = agent_random.get_action(s)
        #a = 0
        s, _, t, _, _ = env.step(a)  # for inference, we don't need reward
        #env.render()
        if t:
            max_steps_list.append(env.current_episode_steps)
    t = False
    battle += 1
    if env.env.winner in winner_dict:
        winner_dict[env.env.winner] += 1
    else:
        winner_dict[env.env.winner] = 1
    #print(env.winner)  # winner id number
print(np.mean(max_steps_list), np.median(max_steps_list), np.max(max_steps_list))
print(winner_dict)
# print(active_dict)

5.39 5.0 9
{0: 30, 1: 70}


In [86]:
display_medium_obs(s)

{'state': array([ 0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
        1.        ,  0.        ,  0.        ,  1.        ,  1.        ,
        0.4       ,  1.        ,  1.        , -0.5       ,  0.        ,
        1.        , -0.5       ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        1.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        , -0.5       , -0.5       , -0.5       , -0.5       ,
       -0.5       , -0.5       , -0.5       , -0.5       , -0.5       ,
       -0.5       , -0.5       , -0.5       , -0.5       , -0.5       ,
       -0.5       , -0.5       , -0.5       , -0.5       , -0.5       ,
       -0.5       , -0.5       , -0.5       , -0.5       , -0.5       ,
        1.        , -0.5       , -0.5       ,  0.25      , -0.5       ,
       -0.5       ,  0.25      , -0.5       , -0.5    

##### testing simple env runs

In [108]:
# testing my env
# PkmBattleEnvWrapper

from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator
import pkm_battle_env_wrapper

importlib.reload(pkm_battle_env_wrapper)
from pkm_battle_env_wrapper import NiBot, PkmBattleEnvWrapper

n_battles = 1000 # total number of battles
#active_dict = {'active':{}, 'opp':{}}
winner_dict = {}
max_steps_list = []
t = False
battle = 0
agent_random = RandomPlayer()
# do this if want agent to swap back and forth
env = PkmBattleEnvWrapper("asdf", is_debug_mode=True, obs_type='simple')

while battle < n_battles:
    
    # agent is always index 0
    #env = PkmBattleEnvWrapper("asdf", is_debug_mode=True)
    s, _ = env.reset()
    while not t:  # True when all pkms of one of the two PkmTeam faint
        # a = [agent0.get_action(s[0]), agent1.get_action(s[1])]
        # # if a[0] < 0 or a[0] > 4:
        # #     print('ni action is ', a[0])
        # # print(a[0])
        # if a[0] in active_dict['active']:
        #     active_dict['active'][a[0]] += 1
        # else:
        #     active_dict['active'][a[0]] = 1
        #print('ni action is ', a[0], a[1])
        # if a[1] < 0 or a[1] >= 0:
        #     print('nibot action is ', a[1])
        a = agent_random.get_action(s)
        s, _, t, _, _ = env.step(a)  # for inference, we don't need reward
        #env.render()
        if t:
            max_steps_list.append(env.current_episode_steps)
    t = False
    battle += 1
    if env.env.winner in winner_dict:
        winner_dict[env.env.winner] += 1
    else:
        winner_dict[env.env.winner] = 1
    #print(env.winner)  # winner id number
print(np.mean(max_steps_list), np.median(max_steps_list), np.max(max_steps_list))
print(winner_dict)
# print(active_dict)

5.881 6.0 12
{1: 487, 0: 513}


In [109]:
display_medium_obs(s)

{'state': array([0.     , 0.     , 1.     , 0.     , 0.     , 1.     , 0.     ,
       0.     , 1.     , 1.     , 0.4875 , 1.     , 1.     , 0.05625,
       1.     , 1.     , 0.2625 , 1.     , 0.     , 0.     , 0.     ,
       0.     , 0.     , 0.     , 0.     , 0.     , 0.     , 0.     ,
       0.     , 0.     , 1.     , 0.     , 0.     , 0.     , 0.     ,
       0.     ], dtype=float32)}
(36,)
surviving, hp, revealed self
[0. 0. 1. 0. 0. 1. 0. 0. 1.]
[1.      0.4875  1.      1.      0.05625 1.      1.      0.2625  1.     ]
nibot obs active
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[1. 0. 0. 0. 0. 0.]
nibot obs party
[]
[]
[]
[]
type values
[]
[]
move values self
[]
move values opp
[]


##### Testing a simple bot

In [135]:
# testing my env
# PkmBattleEnvWrapper

from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator
import pkm_battle_env_wrapper

importlib.reload(pkm_battle_env_wrapper)
from pkm_battle_env_wrapper import NiBot, PkmBattleEnvWrapper, SimpleBot

from vgc.engine.PkmBattleEnv import PkmBattleEnv

from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator


n_battles = 500 # total number of battles
#active_dict = {'active':{}, 'opp':{}}
winner_dict = {}
max_steps_list = []
t = False
battle = 0
agent_simple = SimpleBot() #RandomPlayer() #SimpleBot()#
agent_nibot = NiBot() #RandomPlayer() #NiBot()
# do this if want agent to swap back and forth
#env = PkmBattleEnvWrapper("asdf", is_debug_mode=True, obs_type='simple')



team_generator = RandomTeamGenerator(2)




while battle < n_battles:
    
    # agent is always index 0
    #env = PkmBattleEnvWrapper("asdf", is_debug_mode=True)
    
    team0 = team_generator.get_team().get_battle_team([0, 1, 2])
    team1 = team_generator.get_team().get_battle_team([0, 1, 2])
    env = PkmBattleEnv((team0, team1),
                   encode=(False, False))  # set new environment with teams
    
    s, _ = env.reset()
    while not t:  # True when all pkms of one of the two PkmTeam faint
        # a = [agent0.get_action(s[0]), agent1.get_action(s[1])]
        # # if a[0] < 0 or a[0] > 4:
        # #     print('ni action is ', a[0])
        # # print(a[0])
        # if a[0] in active_dict['active']:
        #     active_dict['active'][a[0]] += 1
        # else:
        #     active_dict['active'][a[0]] = 1
        #print('ni action is ', a[0], a[1])
        # if a[1] < 0 or a[1] >= 0:
        #     print('nibot action is ', a[1])
        a = [agent_simple.get_action(s[0]), agent_nibot.get_action(s[1])]
        s, _, t, _, _ = env.step(a)  # for inference, we don't need reward
        #env.render()
        # if t:
        #     max_steps_list.append(env.current_episode_steps)
    t = False
    battle += 1
    if env.winner in winner_dict:
        winner_dict[env.winner] += 1
    else:
        winner_dict[env.winner] = 1
    #print(env.winner)  # winner id number
#print(np.mean(max_steps_list), np.median(max_steps_list), np.max(max_steps_list))
print(np.round(winner_dict[0]/n_battles,3), np.round(winner_dict[1]/n_battles,3))
# print(active_dict)

0.726 0.274


In [73]:
# testing my env
# PkmBattleEnvWrapper

from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator
import pkm_battle_env_wrapper

importlib.reload(pkm_battle_env_wrapper)
from pkm_battle_env_wrapper import NiBot, PkmBattleEnvWrapper

n_battles = 1 # total number of battles
#active_dict = {'active':{}, 'opp':{}}
winner_dict = {}
t = False
battle = 0
agent_random = RandomPlayer()
env = PkmBattleEnvWrapper("asdf", is_debug_mode=True)

while battle < n_battles:
    
    s, _ = env.reset()
    while not t:  # True when all pkms of one of the two PkmTeam faint
        # a = [agent0.get_action(s[0]), agent1.get_action(s[1])]
        # # if a[0] < 0 or a[0] > 4:
        # #     print('ni action is ', a[0])
        # # print(a[0])
        # if a[0] in active_dict['active']:
        #     active_dict['active'][a[0]] += 1
        # else:
        #     active_dict['active'][a[0]] = 1
        #print('ni action is ', a[0], a[1])
        # if a[1] < 0 or a[1] >= 0:
        #     print('nibot action is ', a[1])
        a = agent_random.get_action(s)
        s, _, t, _, _ = env.step(a)  # for inference, we don't need reward
        #env.render()
    t = False
    battle += 1
    if env.env.winner in winner_dict:
        winner_dict[env.env.winner] += 1
    else:
        winner_dict[env.env.winner] = 1
    #print(env.winner)  # winner id number
print(winner_dict)
# print(active_dict)

{1: 1}


In [78]:
display_medium_obs(s)

{'state': array([ 0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
        1.        ,  0.        ,  0.        ,  1.        ,  1.        ,
        0.125     ,  1.        ,  1.        ,  0.25      ,  1.        ,
        0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        1.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.5       ,  0.5       ,  1.        ,  0.5       ,  0.5       ,
        0.25      ,  0.5       ,  0.5       ,  1.     

## Old scrap

In [3]:
import numpy as np
a = [9,1,2]
# np.array(a, dtype=np.float32)
b = [1,312.,2]
a.extend(b)
print(a)

[9, 1, 2, 1, 312.0, 2]


### Random move generating logic

In [25]:
#https://gitlab.com/DracoStriker/pokemon-vgc-engine/-/blob/master/vgc/util/generator/PkmTeamGenerators.py#L42

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    return np.exp(x) / np.sum(np.exp(x), axis=0)

base_stats = np.array([120., 30., 30., 30., 30.])
evs = np.random.multinomial(10, softmax(np.random.normal(0, 1, 5)), size=None) * 36 + base_stats

print(evs)

for i in range(4):
    print(evs[i + 1])

[300.  30.  30. 174.  66.]
30.0
30.0
174.0
66.0


In [31]:
softmax(np.random.normal(0, 1, 5))

array([0.05899407, 0.80928058, 0.04238171, 0.0454171 , 0.04392655])

In [26]:
np.random.multinomial(10, softmax(np.random.normal(0, 1, 5)), size=None)

array([2, 0, 2, 1, 5])

In [41]:
max_list = []
for i in range(10000):
    base_stats = np.array([120., 30., 30., 30., 30.])
    evs = np.random.multinomial(10, softmax(np.random.normal(0, 1, 5)), size=None) * 36 + base_stats
    max_list.append(evs[1:])

print(np.max(max_list), np.mean(max_list), np.median(max_list))

390.0 102.0108 66.0


In [48]:
def one_hot(p, n):
    b = [0] * n
    b[p] = 1
    return b

one_hot(team0.active.type, 18)


[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]

In [52]:
team0.active.moves[1].type == team0.active.type

False

In [49]:
team0.active.type

<PkmType.BUG: 11>

In [None]:
team

In [45]:
dir(team0.active.type)

['__class__',
 '__doc__',
 '__module__',
 'as_integer_ratio',
 'bit_length',
 'conjugate',
 'denominator',
 'from_bytes',
 'imag',
 'name',
 'numerator',
 'real',
 'to_bytes',
 'value']

### Testing random teams moves
* looks as expected
* only thing differing is the move power and type. type for move 0 matches the pkm type

In [5]:
from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator

team_generator = RandomTeamGenerator(2)

team0 = team_generator.get_team().get_battle_team([0, 1, 2])
team1 = team_generator.get_team().get_battle_team([0, 1, 2])

In [6]:

for i in range(10):
    print(f"--- {i}")
    team0 = team_generator.get_team().get_battle_team([0, 1, 2])
    print(team0.active.type)
    for i in range(4):
        print(team0.active.moves[i].acc, team0.active.moves[i].power, team0.active.moves[i].type, team0.active.moves[i].priority, team0.active.moves[i].target,
            team0.active.moves[i].status, team0.active.moves[i].weather,  team0.active.moves[i].pp, team0.active.moves[i].name)
    print("__________")

--- 0
PkmType.STEEL
1.0 210.0 PkmType.STEEL False 1 PkmStatus.NONE WeatherCondition.CLEAR 10 None
1.0 102.0 PkmType.GROUND False 1 PkmStatus.NONE WeatherCondition.CLEAR 10 None
1.0 66.0 PkmType.DARK False 1 PkmStatus.NONE WeatherCondition.CLEAR 10 None
1.0 30.0 PkmType.ICE False 1 PkmStatus.NONE WeatherCondition.CLEAR 10 None
__________
--- 1
PkmType.GROUND
1.0 30.0 PkmType.GROUND False 1 PkmStatus.NONE WeatherCondition.CLEAR 10 None
1.0 30.0 PkmType.ELECTRIC False 1 PkmStatus.NONE WeatherCondition.CLEAR 10 None
1.0 30.0 PkmType.GHOST False 1 PkmStatus.NONE WeatherCondition.CLEAR 10 None
1.0 282.0 PkmType.STEEL False 1 PkmStatus.NONE WeatherCondition.CLEAR 10 None
__________
--- 2
PkmType.PSYCHIC
1.0 66.0 PkmType.PSYCHIC False 1 PkmStatus.NONE WeatherCondition.CLEAR 10 None
1.0 66.0 PkmType.FLYING False 1 PkmStatus.NONE WeatherCondition.CLEAR 10 None
1.0 102.0 PkmType.GRASS False 1 PkmStatus.NONE WeatherCondition.CLEAR 10 None
1.0 210.0 PkmType.ICE False 1 PkmStatus.NONE WeatherConditi

In [21]:
team0.active.fainted()

False

In [18]:
team0.entry_hazard

[0, 0]

In [20]:
float(team0.confused)

1.0

In [12]:
team0.active.revealed

False

In [15]:
print(team0.active.revealed, team0.active.fainted())
print(team0.party[0].revealed, team0.party[0].fainted())

False False
False False


In [8]:
team0.active.fainted()

False

In [10]:
asdf = [team0.active] + team0.party
print(asdf, len(asdf))

[<vgc.datatypes.Objects.Pkm object at 0x00000230FE72FF10>, <vgc.datatypes.Objects.Pkm object at 0x00000230FE69C790>, <vgc.datatypes.Objects.Pkm object at 0x00000230FE7A3040>] 3


In [24]:
dir(team0.active.moves[0])

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'acc',
 'effect',
 'fixed_damage',
 'hazard',
 'hide',
 'max_pp',
 'move_id',
 'name',
 'owner',
 'power',
 'pp',
 'priority',
 'prob',
 'public',
 'recover',
 'reset',
 'reveal',
 'revealed',
 'set_owner',
 'stage',
 'stat',
 'status',
 'target',
 'type',
 'weather']

In [11]:
dir(team0)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'active',
 'confused',
 'entry_hazard',
 'fainted',
 'get_not_fainted',
 'get_pkm_list',
 'n_turns_confused',
 'party',
 'reset',
 'reset_team_members',
 'size',
 'stage',
 'switch']

### Testing gym make

In [2]:
#import gym_examples
#env = gymnasium.make('gym_examples/GridWorld-v0', size=10)
#env = gymnasium.make('gym_examples/GridWorld-v0')

# team0, team1 = PkmTeam(), PkmTeam()
# agent0, agent1 = RandomPlayer(), RandomPlayer()
# env = PkmBattleEnv((team0, team1),
#                    encode=(True, False))  # set new environment with teams


import gymnasium as gym

from vgc.datatypes.Objects import PkmTeam, Pkm, GameState, Weather
from vgc.engine.PkmBattleEnv import PkmBattleEnv

env = gym.make('PkmBattleEnv-v0', teams=(PkmTeam(), PkmTeam()))

obs, info = env.reset()
print(obs)
print(info)



([False, 0, 0, 0.0, 0.0, 0.0, 1.0, 0.0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0.125, 1.0, 0.5, False, 0.0, 1, 0.0, 0, 0.0, 0.0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0.125, 1.0, 0.5, False, 0.0, 1, 0.0, 0, 0.0, 0.0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0.125, 1.0, 0.5, False, 0.0, 1, 0.0, 0, 0.0, 0.0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0.125, 1.0, 0.5, False, 0.0, 1, 0.0, 0, 0.0, 0.0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1.0, 0.0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0.125, 1.0, 0.5, False, 0.0, 1, 0.0, 0, 0.0, 0.0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0.125, 1.0, 0.5, False, 0.0, 1, 0.0, 0, 0.0, 0.0, 1, 0, 0, 0, 0, 0, 0,

  logger.deprecation(
  logger.deprecation(
  logger.warn(f"{pre} should be an int or np.int64, actual type: {type(obs)}")
  logger.warn(f"{pre} is not within the observation space.")


In [3]:
env.observation_space

Discrete(1188)

In [55]:

# testing cleanrl make_env

import importlib
import pkm_battle_env_wrapper_cleanrl

importlib.reload(pkm_battle_env_wrapper_cleanrl)
from pkm_battle_env_wrapper_cleanrl import NiBotCleanRL, PkmBattleEnvWrapperCleanRL
import numpy as np

def make_env(env_id, idx, capture_video, run_name):
    def thunk():
        if capture_video and idx == 0:
            env = gym.make(env_id, render_mode="rgb_array")
            env = gym.wrappers.RecordVideo(env, f"videos/{run_name}")
        else:
            env = gym.make(env_id, teams=(PkmTeam(), PkmTeam()))

        if env_id == 'PkmBattleEnv-v0':
            env = PkmBattleEnvWrapperCleanRL(env)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

test_env = make_env('PkmBattleEnv-v0', 0, False, 'test')
obs, info = test_env().reset()
print(obs, info)
print(type(obs), obs.shape)

asdf
[0. 0. 0. ... 0. 0. 0.] {}
<class 'numpy.ndarray'> (1188,)


In [53]:
# testing cleanrly sycn vector env
# I think the shape part for action space is only used in continuous PPO and not discrete PPO

import torch

env_id = 'PkmBattleEnv-v0'
capture_video = False
run_name = 'test'
num_envs = 16

envs = gym.vector.SyncVectorEnv(
    [make_env(env_id, i, capture_video, run_name) for i in range(num_envs)],
)
assert isinstance(envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported"
print(envs.single_action_space.n)
print(envs.single_action_space.shape)
print(envs.single_observation_space.shape)

next_obs, _ = envs.reset(seed=5)
print(next_obs)

device = 'cpu'
actions = torch.zeros((10, num_envs) + envs.single_action_space.shape).to(device)
action = actions[0]


6
()
(1188,)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


  logger.warn(


In [54]:
# print(action.cpu().numpy(), action.shape)
# envs.step(action.cpu().numpy())
# next_obs, reward, terminations, truncations, infos = envs.step(action.cpu().numpy())

In [45]:
# next_obs[0].tolist()

In [18]:
print(np.array(envs.single_observation_space.shape).prod())

1.0


In [24]:
def make_env_cartpole(env_id, idx, capture_video, run_name):
    def thunk():
        if capture_video and idx == 0:
            env = gym.make(env_id, render_mode="rgb_array")
            env = gym.wrappers.RecordVideo(env, f"videos/{run_name}")
        else:
            env = gym.make(env_id)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env

    return thunk

#cartpole_env = gym.make('CartPole-v0')
cartpole_envs = gym.vector.SyncVectorEnv(
    [make_env_cartpole('CartPole-v0', i, capture_video, run_name) for i in range(num_envs)],
)
assert isinstance(cartpole_envs.single_action_space, gym.spaces.Discrete), "only discrete action space is supported"
print(cartpole_envs.single_action_space.n)
print(cartpole_envs.single_action_space.shape)
print(cartpole_envs.single_observation_space.shape)

2
()
(4,)


In [31]:
# testing something with torch
import torch
actions = torch.zeros((10, num_envs) + cartpole_envs.single_action_space.shape)
print(actions.shape)
b_actions = actions.reshape((-1,) + envs.single_action_space.shape)
print(b_actions.shape)
actions = torch.zeros((10, num_envs))
print(actions.shape)
print(b_actions.shape)

torch.Size([10, 16])
torch.Size([160])
torch.Size([10, 16])
torch.Size([160])


In [19]:
envs.single_observation_space

Discrete(1188)

In [14]:
dir(envs.single_action_space)

['__class__',
 '__class_getitem__',
 '__contains__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__orig_bases__',
 '__parameters__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_is_protocol',
 '_np_random',
 '_shape',
 'contains',
 'dtype',
 'from_jsonable',
 'is_np_flattenable',
 'n',
 'np_random',
 'sample',
 'seed',
 'shape',
 'start',
 'to_jsonable']

In [3]:
import os
os.getcwd()

'c:\\Users\\james\\github_repos\\pokemon-vgc-engine\\s3stuff'

In [4]:
import importlib
import pkm_battle_env_wrapper_cleanrl

importlib.reload(pkm_battle_env_wrapper_cleanrl)
from pkm_battle_env_wrapper_cleanrl import NiBotCleanRL, PkmBattleEnvWrapperCleanRL
import numpy as np


### setting max number of env steps

In [22]:
# testing my env
# PkmBattleEnvWrapper

from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator
import pkm_battle_env_wrapper

importlib.reload(pkm_battle_env_wrapper)
from pkm_battle_env_wrapper import NiBot, PkmBattleEnvWrapper
import numpy as np

n_battles = 10 # total number of battles
#active_dict = {'active':{}, 'opp':{}}
winner_dict = {}
max_steps_list = []
t = False
battle = 0
agent_random = RandomPlayer()
env = PkmBattleEnvWrapper("asdf")

while battle < n_battles:
    
    s, _ = env.reset()
    while not t:  # True when all pkms of one of the two PkmTeam faint
        # a = [agent0.get_action(s[0]), agent1.get_action(s[1])]
        # # if a[0] < 0 or a[0] > 4:
        # #     print('ni action is ', a[0])
        # # print(a[0])
        # if a[0] in active_dict['active']:
        #     active_dict['active'][a[0]] += 1
        # else:
        #     active_dict['active'][a[0]] = 1
        #print('ni action is ', a[0], a[1])
        # if a[1] < 0 or a[1] >= 0:
        #     print('nibot action is ', a[1])
        a = agent_random.get_action(s)
        s, reward, t, _, _ = env.step(a)  # for inference, we don't need reward
        #env.render()
        if t:
            max_steps_list.append(env.current_episode_steps)
            print(reward, t, env.current_episode_steps)
    t = False
    battle += 1
    if env.env.winner in winner_dict:
        winner_dict[env.env.winner] += 1
    else:
        winner_dict[env.env.winner] = 1
    #print(env.winner)  # winner id number
print(np.mean(max_steps_list), np.median(max_steps_list), np.max(max_steps_list))
print(winner_dict)
# print(active_dict)

1.0 True 5
-1.0 True 4
-1.0 True 6
1.0 True 7
0.0 True 7
0.0 True 7
-1.0 True 6
-1.0 True 7
0.0 True 7
1.0 True 6
6.2 6.5 7
{0: 3, 1: 4, -1: 3}


### Test that env runs a bunch of times and no obvious bugs in env

In [2]:
from vgc.datatypes.Objects import PkmTeam, Pkm, GameState, Weather
from vgc.engine.PkmBattleEnv import PkmBattleEnv
from vgc.behaviour.BattlePolicies import RandomPlayer#, TerminalPlay

from pkm_battle_env_wrapper import NiBot, PkmBattleEnvWrapper

import importlib
import pkm_battle_env_wrapper

importlib.reload(pkm_battle_env_wrapper)
from pkm_battle_env_wrapper import NiBot

In [13]:
# testing my env
# PkmBattleEnvWrapper

from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator
import pkm_battle_env_wrapper

importlib.reload(pkm_battle_env_wrapper)
from pkm_battle_env_wrapper import NiBot, PkmBattleEnvWrapper

n_battles = 1000 # total number of battles
#active_dict = {'active':{}, 'opp':{}}
winner_dict = {}
max_steps_list = []
t = False
battle = 0
agent_random = RandomPlayer()
env = PkmBattleEnvWrapper("asdf")

while battle < n_battles:
    
    s, _ = env.reset()
    while not t:  # True when all pkms of one of the two PkmTeam faint
        # a = [agent0.get_action(s[0]), agent1.get_action(s[1])]
        # # if a[0] < 0 or a[0] > 4:
        # #     print('ni action is ', a[0])
        # # print(a[0])
        # if a[0] in active_dict['active']:
        #     active_dict['active'][a[0]] += 1
        # else:
        #     active_dict['active'][a[0]] = 1
        #print('ni action is ', a[0], a[1])
        # if a[1] < 0 or a[1] >= 0:
        #     print('nibot action is ', a[1])
        a = agent_random.get_action(s)
        s, _, t, _, _ = env.step(a)  # for inference, we don't need reward
        #env.render()
        if t:
            max_steps_list.append(env.current_episode_steps)
    t = False
    battle += 1
    if env.env.winner in winner_dict:
        winner_dict[env.env.winner] += 1
    else:
        winner_dict[env.env.winner] = 1
    #print(env.winner)  # winner id number
print(np.mean(max_steps_list), np.median(max_steps_list), np.max(max_steps_list))
print(winner_dict)
# print(active_dict)

NameError: name 'np' is not defined

In [4]:
s

{'state': array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)}

In [7]:
# testing my env
# PkmBattleEnvWrapper

from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator
import pkm_battle_env_wrapper

importlib.reload(pkm_battle_env_wrapper)
from pkm_battle_env_wrapper import NiBot, PkmBattleEnvWrapper

n_battles = 1000 # total number of battles
#active_dict = {'active':{}, 'opp':{}}
winner_dict = {}
t = False
battle = 0
agent_random = RandomPlayer()
env = PkmBattleEnvWrapper("asdf")

while battle < n_battles:
    
    s, _ = env.reset()
    while not t:  # True when all pkms of one of the two PkmTeam faint
        # a = [agent0.get_action(s[0]), agent1.get_action(s[1])]
        # # if a[0] < 0 or a[0] > 4:
        # #     print('ni action is ', a[0])
        # # print(a[0])
        # if a[0] in active_dict['active']:
        #     active_dict['active'][a[0]] += 1
        # else:
        #     active_dict['active'][a[0]] = 1
        #print('ni action is ', a[0], a[1])
        # if a[1] < 0 or a[1] >= 0:
        #     print('nibot action is ', a[1])
        a = agent_random.get_action(s)
        s, _, t, _, _ = env.step(a)  # for inference, we don't need reward
        #env.render()
    t = False
    battle += 1
    if env.winner in winner_dict:
        winner_dict[env.env.winner] += 1
    else:
        winner_dict[env.env.winner] = 1
    #print(env.winner)  # winner id number
print(winner_dict)
# print(active_dict)

{1: 840, 0: 160}


### Test an advanced Opponent set up

In [5]:
from vgc.datatypes.Objects import PkmTeam, Pkm, GameState, Weather
from vgc.engine.PkmBattleEnv import PkmBattleEnv
from vgc.behaviour.BattlePolicies import RandomPlayer#, TerminalPlay

from pkm_battle_env_wrapper import NiBot

import importlib
import pkm_battle_env_wrapper

importlib.reload(pkm_battle_env_wrapper)
from pkm_battle_env_wrapper import NiBot

In [6]:
team0, team1 = PkmTeam(), PkmTeam()
agent0, agent1 = RandomPlayer(), RandomPlayer()
env = PkmBattleEnv((team0, team1),
                   encode=(True, False))  # set new environment with teams

obs, info = env.reset()  # reset environment and get initial observation

In [7]:
print(len(obs))
print(obs[1])

2
<PkmBattleEnv instance>


In [8]:
# how it works for random agent
agent1.get_action(obs[1])

0

In [9]:
test_nibot = NiBot()
for i in range(10):
    #test_nibot.get_action(obs[1])
    print(test_nibot.get_action(obs[1]))

45.0 PkmType.NORMAL PkmType.NORMAL 30.0 PkmType.NORMAL 0 0 1.0
45.0 PkmType.NORMAL PkmType.NORMAL 30.0 PkmType.NORMAL 0 0 1.0
45.0 PkmType.NORMAL PkmType.NORMAL 30.0 PkmType.NORMAL 0 0 1.0
45.0 PkmType.NORMAL PkmType.NORMAL 30.0 PkmType.NORMAL 0 0 1.0
0 Move 3 0 Damage 45.0 Poke Pkm(Type=NORMAL, HP=240, Moves={PkmMove(Power=30, Acc=1.0, PP=10, Type=NORMAL), PkmMove(Power=30, Acc=1.0, PP=10, Type=NORMAL), PkmMove(Power=30, Acc=1.0, PP=10, Type=NORMAL), PkmMove(Power=30, Acc=1.0, PP=10, Type=NORMAL), })
Pokemon 0 Pkm(Type=NORMAL, HP=240, Moves={PkmMove(Power=30, Acc=1.0, PP=10, Type=NORMAL), PkmMove(Power=30, Acc=1.0, PP=10, Type=NORMAL), PkmMove(Power=30, Acc=1.0, PP=10, Type=NORMAL), PkmMove(Power=30, Acc=1.0, PP=10, Type=NORMAL), }) Move 0 Damage 45.0
45.0 PkmType.NORMAL PkmType.NORMAL 30.0 PkmType.NORMAL 0 0 1.0
45.0 PkmType.NORMAL PkmType.NORMAL 30.0 PkmType.NORMAL 0 0 1.0
45.0 PkmType.NORMAL PkmType.NORMAL 30.0 PkmType.NORMAL 0 0 1.0
45.0 PkmType.NORMAL PkmType.NORMAL 30.0 PkmType.

#### Testing nibot

In [26]:
from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator
import pkm_battle_env_wrapper

importlib.reload(pkm_battle_env_wrapper)
from pkm_battle_env_wrapper import NiBot

n_battles = 1 # total number of battles
active_dict = {'active':{}, 'opp':{}}
t = False
battle = 0
while battle < n_battles:
    team_generator = RandomTeamGenerator(2)
    team0 = team_generator.get_team().get_battle_team([0, 1, 2])
    team1 = team_generator.get_team().get_battle_team([0, 1, 2])
    # print(team0, team1)
    #team0, team1 = PkmTeam(), PkmTeam()
    agent0, agent1 = NiBot(), RandomPlayer() #NiBot() #RandomPlayer()
    env = PkmBattleEnv((team0, team1),
                    encode=(agent0.requires_encode(), agent1.requires_encode()))  # set new environment with teams
    s, _ = env.reset()
    while not t:  # True when all pkms of one of the two PkmTeam faint
        a = [agent0.get_action(s[0]), agent1.get_action(s[1])]
        if a[0] < 0 or a[0] > 4:
            print('ni action is ', a[0])
        print(a[0])
        if a[0] in active_dict['active']:
            active_dict['active'][a[0]] += 1
        else:
            active_dict['active'][a[0]] = 1
        #print('ni action is ', a[0], a[1])
        # if a[1] < 0 or a[1] >= 0:
        #     print('nibot action is ', a[1])
        s, _, t, _, _ = env.step(a)  # for inference, we don't need reward
        #env.render()
    t = False
    battle += 1
    print(env.winner)  # winner id number

print(active_dict)

pkm moves is  [<vgc.datatypes.Objects.PkmMove object at 0x00000192B7363B80>, <vgc.datatypes.Objects.PkmMove object at 0x00000192B74D97C0>, <vgc.datatypes.Objects.PkmMove object at 0x00000192B74D9160>, <vgc.datatypes.Objects.PkmMove object at 0x00000192B74D9730>]
0 Move 0 0 Damage 45.0 Poke Pkm(Type=FAIRY, HP=228, Moves={PkmMove(Power=30, Acc=1.0, PP=10, Type=FAIRY), PkmMove(Power=30, Acc=1.0, PP=10, Type=ELECTRIC), PkmMove(Power=174, Acc=1.0, PP=10, Type=GROUND), PkmMove(Power=138, Acc=1.0, PP=10, Type=WATER), })
0 Move 1 0 Damage 45.0 Poke Pkm(Type=FAIRY, HP=228, Moves={PkmMove(Power=30, Acc=1.0, PP=10, Type=FAIRY), PkmMove(Power=30, Acc=1.0, PP=10, Type=ELECTRIC), PkmMove(Power=174, Acc=1.0, PP=10, Type=GROUND), PkmMove(Power=138, Acc=1.0, PP=10, Type=WATER), })
0 Move 2 2 Damage 87.0 Poke Pkm(Type=FAIRY, HP=228, Moves={PkmMove(Power=30, Acc=1.0, PP=10, Type=FAIRY), PkmMove(Power=30, Acc=1.0, PP=10, Type=ELECTRIC), PkmMove(Power=174, Acc=1.0, PP=10, Type=GROUND), PkmMove(Power=138, A

6

In [None]:
team0, team1 = PkmTeam(), PkmTeam()
agent0, agent1 = RandomPlayer(), RandomPlayer()
env = PkmBattleEnv((team0, team1),
                   encode=(agent0.requires_encode(), agent1.requires_encode()))  # set new environment with teams
n_battles = 3  # total number of battles
t = False
battle = 0
while battle < n_battles:
    s, _ = env.reset()
    while not t:  # True when all pkms of one of the two PkmTeam faint
        a = [agent0.get_action(s[0]), agent1.get_action(s[1])]
        s, _, t, _, _ = env.step(a)  # for inference, we don't need reward
        #env.render()
    t = False
    battle += 1
print(env.winner)  # winner id number

### Test generating roster per competition


In [2]:
# https://gitlab.com/DracoStriker/pokemon-vgc-engine/-/tree/master/vgc/util/generator?ref_type=heads

from vgc.competition.Competition import TreeChampionship
from vgc.competition.Competitor import CompetitorManager
from vgc.network.ProxyCompetitor import ProxyCompetitor
from vgc.util.generator.PkmRosterGenerators import RandomPkmRosterGenerator
from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator

from vgc.datatypes.Objects import PkmTeam, Pkm, GameState, Weather

#### Notes
* roster is of size 100 with 100 pokemon on it
* not quite following the logic in competition but can generate a random team with one method

In [49]:
# get random team but not competition styple
roster = RandomPkmRosterGenerator().gen_roster()

temp_pkm = roster[-1].gen_pkm([0,1,2,3])
temp_pkm2 = roster[-2].gen_pkm([0,1,2,3])
temp_pkm3 = roster[-3].gen_pkm([0,1,2,3])
print(temp_pkm, temp_pkm2, temp_pkm3)
my_team = PkmTeam([temp_pkm, temp_pkm2, temp_pkm3])
print(my_team)

# can then make two random teams on the fly with the resoter each tiem
# can set roster size to only get certain number of pokemon
roster = RandomPkmRosterGenerator(roster_size=6).gen_roster()

print(len(roster))

Pkm(Type=WATER, HP=240, Moves={Bubble Beam, Ice Shard, Ember, String Shot, }) Pkm(Type=PSYCHIC, HP=210, Moves={Psybeam, Tackle, Dragon Rage, Close Combat, }) Pkm(Type=ELECTRIC, HP=180, Moves={Thunder Wave, Bullet Punch, Giga Drain, Leech Life, })
Active:
Pkm(Type=WATER, HP=240, Moves={Bubble Beam, Ice Shard, Ember, String Shot, })
Party:
Pkm(Type=PSYCHIC, HP=210, Moves={Psybeam, Tackle, Dragon Rage, Close Combat, })
Pkm(Type=ELECTRIC, HP=180, Moves={Thunder Wave, Bullet Punch, Giga Drain, Leech Life, })

6


In [51]:
# maybe alternative way
from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator

team_generator = RandomTeamGenerator(2)
team0 = team_generator.get_team().get_battle_team([0, 1, 2])
team1 = team_generator.get_team().get_battle_team([0, 1, 2])
print(team0)
print(team1)

# team0 = self.gen.get_team().get_battle_team([0, 1, 2])
# team1 = self.gen.get_team().get_battle_team([0, 1, 2])


Active:
Pkm(Type=GHOST, HP=120, Moves={PkmMove(Power=102, Acc=1.0, PP=10, Type=GHOST), PkmMove(Power=30, Acc=1.0, PP=10, Type=ICE), PkmMove(Power=66, Acc=1.0, PP=10, Type=ICE), PkmMove(Power=282, Acc=1.0, PP=10, Type=GROUND), })
Party:
Pkm(Type=FIRE, HP=264, Moves={PkmMove(Power=30, Acc=1.0, PP=10, Type=FIRE), PkmMove(Power=102, Acc=1.0, PP=10, Type=GRASS), PkmMove(Power=174, Acc=1.0, PP=10, Type=DARK), PkmMove(Power=30, Acc=1.0, PP=10, Type=BUG), })
Pkm(Type=POISON, HP=192, Moves={PkmMove(Power=138, Acc=1.0, PP=10, Type=POISON), PkmMove(Power=66, Acc=1.0, PP=10, Type=POISON), PkmMove(Power=102, Acc=1.0, PP=10, Type=STEEL), PkmMove(Power=102, Acc=1.0, PP=10, Type=GROUND), })

Active:
Pkm(Type=ELECTRIC, HP=156, Moves={PkmMove(Power=66, Acc=1.0, PP=10, Type=ELECTRIC), PkmMove(Power=174, Acc=1.0, PP=10, Type=FLYING), PkmMove(Power=66, Acc=1.0, PP=10, Type=GRASS), PkmMove(Power=138, Acc=1.0, PP=10, Type=ELECTRIC), })
Party:
Pkm(Type=ICE, HP=120, Moves={PkmMove(Power=318, Acc=1.0, PP=10, Ty

In [3]:
roster = RandomPkmRosterGenerator().gen_roster()
championship = TreeChampionship(roster, debug=True, gen=RandomTeamGenerator(2))


In [11]:
print(len(roster))

#dir(roster[0])
for i in range(10):
    print(roster[i])

100
PkmTemplate(Type=DRAGON, Max_HP=210, Moves={Outrage, Bullet Punch, Tackle, Wing Attack, })
PkmTemplate(Type=DRAGON, Max_HP=210, Moves={Outrage, Bullet Punch, Tackle, Wing Attack, })
PkmTemplate(Type=DARK, Max_HP=120, Moves={Crunch, Thunder Shock, Fire Blast, Ice Beam, })
PkmTemplate(Type=NORMAL, Max_HP=240, Moves={Slam, Sunny Day, Hail, Energy Ball, })
PkmTemplate(Type=FAIRY, Max_HP=150, Moves={Play Rough, Hydro Pump, Slam, Power Gem, })
PkmTemplate(Type=DARK, Max_HP=180, Moves={Crunch, Draco Meteor, Bullet Punch, Spore, })
PkmTemplate(Type=PSYCHIC, Max_HP=240, Moves={Psychic, Bug Buzz, Sweet Kiss, Power Gem, })
PkmTemplate(Type=GRASS, Max_HP=240, Moves={Energy Ball, Hail, Bubble Beam, Thunder Shock, })
PkmTemplate(Type=GRASS, Max_HP=240, Moves={Razor Leaf, Giga Drain, Bug Buzz, Nasty Plot, })
PkmTemplate(Type=GROUND, Max_HP=240, Moves={Mud Shot, Hydro Pump, Ice Shard, Dragon Rage, })
PkmTemplate(Type=GHOST, Max_HP=180, Moves={Shadow Sneak, Bubble Beam, Hydro Pump, Aqua Jet, })


In [12]:
rtg = RandomTeamGenerator(2)

In [46]:
temp_pkm = roster[-1].gen_pkm([0,1,2,3])
temp_pkm2 = roster[-2].gen_pkm([0,1,2,3])
temp_pkm3 = roster[-3].gen_pkm([0,1,2,3])
print(temp_pkm, temp_pkm2, temp_pkm3)
my_team = PkmTeam([temp_pkm, temp_pkm2, temp_pkm3])
print(my_team)

Pkm(Type=FLYING, HP=180, Moves={Hurricane, Ember, Bullet Punch, Dragon Rage, }) Pkm(Type=GRASS, HP=240, Moves={Energy Ball, Rain Dance, Recover, Spore, }) Pkm(Type=ELECTRIC, HP=210, Moves={Thunder, Nasty Plot, Power Gem, Dragon Rage, })
Active:
Pkm(Type=FLYING, HP=180, Moves={Hurricane, Ember, Bullet Punch, Dragon Rage, })
Party:
Pkm(Type=GRASS, HP=240, Moves={Energy Ball, Rain Dance, Recover, Spore, })
Pkm(Type=ELECTRIC, HP=210, Moves={Thunder, Nasty Plot, Power Gem, Dragon Rage, })



In [48]:
print(my_team)

Active:
Pkm(Type=FLYING, HP=180, Moves={Hurricane, Ember, Bullet Punch, Dragon Rage, })
Party:
Pkm(Type=GRASS, HP=240, Moves={Energy Ball, Rain Dance, Recover, Spore, })
Pkm(Type=ELECTRIC, HP=210, Moves={Thunder, Nasty Plot, Power Gem, Dragon Rage, })



In [None]:
# generate pkm from template
# templ = PkmTemplate()
# pkm = templ.gen_pkm([1, 2, 5, 3])  # 4 max!


In [13]:
rtg

<vgc.util.generator.PkmTeamGenerators.RandomTeamGenerator at 0x235d44738b0>

In [16]:
# team0 = self.gen.get_team().get_battle_team([0, 1, 2])
team0 = rtg.get_team().get_battle_team([0, 1, 2])

In [37]:
team0.active.pkm_id

-1

In [39]:
print(team0.party)

[<vgc.datatypes.Objects.Pkm object at 0x00000235D4404070>, <vgc.datatypes.Objects.Pkm object at 0x00000235D44042B0>]


In [38]:
dir(team0)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'active',
 'confused',
 'entry_hazard',
 'fainted',
 'get_not_fainted',
 'get_pkm_list',
 'n_turns_confused',
 'party',
 'reset',
 'reset_team_members',
 'size',
 'stage',
 'switch']

In [34]:
dir(team0.active)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'asleep',
 'fainted',
 'frozen',
 'hide',
 'hide_pkm',
 'hp',
 'max_hp',
 'moves',
 'n_turns_asleep',
 'paralyzed',
 'pkm_id',
 'public',
 'reset',
 'reveal_pkm',
 'revealed',
 'status',
 'type']

In [18]:
dir(team0)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'active',
 'confused',
 'entry_hazard',
 'fainted',
 'get_not_fainted',
 'get_pkm_list',
 'n_turns_confused',
 'party',
 'reset',
 'reset_team_members',
 'size',
 'stage',
 'switch']

In [23]:
team0

<vgc.datatypes.Objects.PkmTeam at 0x235d44738e0>

In [25]:
test_team = PkmTeam()
test_team

<vgc.datatypes.Objects.PkmTeam at 0x235d445da00>

In [26]:
test_team_2 = PkmTeam(pkms=[roster[i] for i in [0, 1, 2]])

In [29]:
test_team_2.active

<vgc.datatypes.Objects.PkmTemplate at 0x2359f53b7c0>

In [30]:
test_team.active

<vgc.datatypes.Objects.Pkm at 0x235d445d880>

In [31]:
team0.active

<vgc.datatypes.Objects.Pkm at 0x235d44734c0>

### timing recreating the env

In [52]:

from vgc.datatypes.Objects import PkmTeam, Pkm, GameState, Weather
from vgc.engine.PkmBattleEnv import PkmBattleEnv
from vgc.behaviour.BattlePolicies import RandomPlayer, TerminalPlayer
import time

team_generator = RandomTeamGenerator(2)

In [61]:
num_iter = 100
time_start = time.time()
for i in range(num_iter):
    

    team0 = team_generator.get_team().get_battle_team([0, 1, 2])
    team1 = team_generator.get_team().get_battle_team([0, 1, 2])

    PkmBattleEnv((team0, team1),
                    # encode Fasle for forward env
                    #encode=(agent0.requires_encode(), agent1.requires_encode()))  # set new environment with teams
                    encode=(True, True))
    # print(i)

print("Time taken for {} iterations: {}".format(num_iter, time.time() - time_start))

Time taken for 100 iterations: 0.02202892303466797
