In [25]:
import numpy as np
import time
import copy

from vgc.datatypes.Objects import PkmTeam, Pkm, GameState, Weather
from vgc.engine.PkmBattleEnv import PkmBattleEnv
from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator

from vgc.datatypes.Constants import TYPE_CHART_MULTIPLIER, MAX_HIT_POINTS, MOVE_MAX_PP, DEFAULT_TEAM_SIZE
from vgc.datatypes.Objects import PkmMove, Pkm, PkmTeam, GameState, Weather
from vgc.datatypes.Types import PkmStat, PkmType, WeatherCondition, \
    N_TYPES, N_STATUS, N_STATS, N_ENTRY_HAZARD, N_WEATHER, PkmStatus, PkmEntryHazard

import math
import pprint
import pickle

### Make a dmg to and from dict

In [35]:

def get_turns_to_faint_list(team_1_game_state, team_2_game_state, max_turns_to_faint_value):
    '''
    put in zero for the fainted
        handle the check for fainted in HP
    assume everything is revealed
        handle masking elsewhere
    '''

    # Get weather condition
    weather = team_1_game_state.weather.condition

    # Get my Pokémon team
    team_1 = team_1_game_state.teams[0]
    team_1_pkm_list = [team_1.active] + team_1.party

    # Get opponent's team
    team_2 = team_2_game_state.teams[0]
    team_2_pkm_list = [team_2.active] + team_2.party

    # Iterate over all my Pokémon and their moves to find the most damaging move
    best_damage_list = []
    turns_to_faint_list = []
    hp_list = []

    for team_1_pkm_index, team_1_pkm in enumerate(team_1_pkm_list):
        # Initialize variables for the best move and its damage
        best_damage = -np.inf

        for team_2_pkm_index, team_2_pkm in enumerate(team_2_pkm_list):
            if team_1_pkm_index == 0:
                team_1_attack_stage = team_1.stage[PkmStat.ATTACK]
            else:
                team_1_attack_stage = 0
            
            if team_2_pkm_index == 0:
                team_2_defense_stage = team_2.stage[PkmStat.DEFENSE]
            else:
                team_2_defense_stage = 0

            for move_index, move in enumerate(team_1_pkm.moves):
                
                damage = estimate_damage(move.type, team_1_pkm.type, move.power, team_2_pkm.type, team_1_attack_stage,
                                            team_2_defense_stage, weather)

                # Check if the current move has higher damage than the previous best move
                if damage > best_damage:
                    best_damage = damage

            # get best dmg for each pokemon
            best_damage_list.append(best_damage)
            hp_list.append(team_2_pkm.hp)

            if best_damage > 0.:
                turns_to_faint = math.ceil(team_2_pkm.hp / best_damage)
            else:
                turns_to_faint = max_turns_to_faint_value

            turns_to_faint_list.append(turns_to_faint)

    print(turns_to_faint_list)
    print(best_damage_list)
    print(hp_list)

    return turns_to_faint_list


def estimate_damage(move_type: PkmType, pkm_type: PkmType, move_power: float, opp_pkm_type: PkmType,
                    attack_stage: int, defense_stage: int, weather: WeatherCondition) -> float:
        '''
        from updated repo
        '''
        stab = 1.5 if move_type == pkm_type else 1.
        if (move_type == PkmType.WATER and weather == WeatherCondition.RAIN) or (
                move_type == PkmType.FIRE and weather == WeatherCondition.SUNNY):
            weather = 1.5
        elif (move_type == PkmType.WATER and weather == WeatherCondition.SUNNY) or (
                move_type == PkmType.FIRE and weather == WeatherCondition.RAIN):
            weather = .5
        else:
            weather = 1.
        stage_level = attack_stage - defense_stage
        stage = (stage_level + 2.) / 2 if stage_level >= 0. else 2. / (np.abs(stage_level) + 2.)
        damage = TYPE_CHART_MULTIPLIER[move_type][opp_pkm_type] * stab * weather * stage * move_power

        #print(damage, move_type, pkm_type, move_power, opp_pkm_type, attack_stage, defense_stage, weather)
        return damage


def save_object_as_pkl(object_to_save, save_tag):
    '''
    Save object a pickle file
    '''
    with open(f'{save_tag}.pickle', 'wb') as handle:
        pickle.dump(object_to_save, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [36]:
test_iters = 5
max_state_value = 1000000
time_start = time.time()

raw_stats_dict = {}
turns_to_faint_dict = {}

team_generator = RandomTeamGenerator(2)

for i in range(test_iters):
    agent_team = team_generator.get_team().get_battle_team([0, 1, ])
    opp_team = team_generator.get_team().get_battle_team([0, 1, ])

    env = PkmBattleEnv((agent_team, opp_team), encode=(False, False)) 

    game_state, info = env.reset()

    team_1_faint_list = get_turns_to_faint_list(game_state[0], game_state[1], max_state_value)
    team_2_faint_list = get_turns_to_faint_list(game_state[1], game_state[0], max_state_value)

    state_tuple = tuple(team_1_faint_list + team_2_faint_list)

    if state_tuple in raw_stats_dict:
        raw_stats_dict[state_tuple] += 1
    else:
        raw_stats_dict[state_tuple] = 1

    for turns in team_1_faint_list + team_2_faint_list:
        if turns in turns_to_faint_dict:
            turns_to_faint_dict[turns] += 1
        else:
            turns_to_faint_dict[turns] = 1

time_end = time.time()
print(f"Time to run {time_end - time_start:.3f} seconds")

print(len(raw_stats_dict))
print(len(turns_to_faint_dict))

save_object_as_pkl(raw_stats_dict, f'turns_to_faint_state_dict_{int(time_start)}')
save_object_as_pkl(turns_to_faint_dict, f'turns_to_faint_count_dict_{int(time_start)}')

[1, 1, 1, 1]
[153.0, 210.0, 130.5, 348.0]
[120.0, 156.0, 120.0, 156.0]
[1, 1, 1, 1]
[138.0, 348.0, 207.0, 207.0]
[120.0, 156.0, 120.0, 156.0]
[1, 1, 2, 1]
[369.0, 369.0, 207.0, 207.0]
[264.0, 192.0, 264.0, 192.0]
[1, 1, 1, 1]
[306.0, 306.0, 276.0, 276.0]
[156.0, 264.0, 156.0, 264.0]
[1, 1, 1, 1]
[276.0, 276.0, 198.0, 198.0]
[156.0, 192.0, 156.0, 192.0]
[2, 2, 2, 1]
[246.0, 246.0, 369.0, 369.0]
[372.0, 336.0, 372.0, 336.0]
[1, 1, 2, 1]
[261.0, 261.0, 138.0, 138.0]
[192.0, 120.0, 192.0, 120.0]
[1, 1, 1, 1]
[315.0, 315.0, 210.0, 306.0]
[156.0, 192.0, 156.0, 192.0]
[2, 2, 1, 2]
[102.0, 348.0, 276.0, 276.0]
[192.0, 408.0, 192.0, 408.0]
[2, 2, 1, 1]
[102.0, 174.0, 204.0, 204.0]
[192.0, 192.0, 192.0, 192.0]
Time to run 0.009 seconds
4
2


In [34]:
print(turns_to_faint_dict)

{1: 33, 2: 6, 3: 1}


In [None]:
print(raw_stats_dict)


#### Scrap

In [None]:


# def get_turns_to_faint_array(hp_list, best_dmg_list, max_state_value):
#     '''
#     hp list is indexed 0 to 3 with
#     0 agent active
#     1 agent party best
#     2 opp active
#     3 opp party best

#     best_dmg_list is indexed 0 to 7
#     0 agent active dmg to opp active
#     1 agent active dmg to opp party best
#     2 agent party best dmg to opp active
#     3 agent party best dmg to opp party best
#     4 opp active dmg to agent active
#     5 opp active dmg to agent party best
#     6 opp party best dmg to agent active
#     7 opp party best dmg to agent party best

#     '''
#     turns_to_faint_array = np.ones((8, ), dtype=np.float32) * -1.

#     agent_active_hp_index = 0
#     agent_party_best_hp_index = 1
#     opp_active_hp_index = 2
#     opp_party_best_hp_index = 3

#     agent_active_dmg_to_opp_active_index = 0
#     agent_active_dmg_to_opp_party_best_index = 1
#     agent_party_best_dmg_to_opp_active_index = 2
#     agent_party_best_dmg_to_opp_party_best_index = 3
#     opp_active_dmg_to_agent_active_index = 4
#     opp_active_dmg_to_agent_party_best_index = 5
#     opp_party_best_dmg_to_agent_active_index = 6
#     opp_party_best_dmg_to_agent_party_best_index = 7

#     turns_to_faint_array[0] = get_dmg_turns_to_faint(hp_list[agent_active_hp_index], 
#         best_dmg_list[agent_active_dmg_to_opp_active_index], max_state_value)
#     turns_to_faint_array[1] = get_dmg_turns_to_faint(hp_list[agent_active_hp_index],
#         best_dmg_list[agent_active_dmg_to_opp_party_best_index], max_state_value)

#     turns_to_faint_array[2] = get_dmg_turns_to_faint(hp_list[agent_party_best_hp_index],
#         best_dmg_list[agent_party_best_dmg_to_opp_active_index], max_state_value)
#     turns_to_faint_array[3] = get_dmg_turns_to_faint(hp_list[agent_party_best_hp_index],
#         best_dmg_list[agent_party_best_dmg_to_opp_party_best_index], max_state_value)

#     turns_to_faint_array[4] = get_dmg_turns_to_faint(hp_list[opp_active_hp_index],
#         best_dmg_list[opp_active_dmg_to_agent_active_index], max_state_value)
#     turns_to_faint_array[5] = get_dmg_turns_to_faint(hp_list[opp_active_hp_index],
#         best_dmg_list[opp_active_dmg_to_agent_party_best_index], max_state_value)

#     turns_to_faint_array[6] = get_dmg_turns_to_faint(hp_list[opp_party_best_hp_index],
#         best_dmg_list[opp_party_best_dmg_to_agent_active_index], max_state_value)
#     turns_to_faint_array[7] = get_dmg_turns_to_faint(hp_list[opp_party_best_hp_index],
#         best_dmg_list[opp_party_best_dmg_to_agent_party_best_index], max_state_value)

    
#     turns_to_faint_array = np.ceil(turns_to_faint_array)
#     turns_to_faint_array = turns_to_faint_array.astype(np.int32)
#     turns_to_faint_array = turns_to_faint_array.clip(-1, max_state_value)

#     return turns_to_faint_array

# turns_to_faint_array = np.ones((8, ), dtype=np.int32) * -1
# turns_to_faint_array
# np.ones((8, ), dtype=np.float32) 
# a = np.array([1.1, 2.0, 3, 4.00001, 5.00000, 5.99999, 7, 8])  
# b = np.ceil(a)
# b = b.astype(int)
# c = tuple(b)
# c
# import math
# type(math.ceil(2/1.5))

### Testing MC Env Learning in 2 v 2 environment

In [None]:
'''
working
    finish up the loop function
        smoke test it
        see if saved action dict does anything eval wise. does it even trigger?
            are results better
    run tests on dict action space size
    probably want to expand functions to at least what opp dmg is to party
    check the functions work

to do important:
    can't swap if swp is identical or will be stuck in a swap loop
    maybe add memory saying if swapped at 2 v 2 (or other levels as well)

to do improvements:
    know the opp dmg to current team. can add those states
        DONE YES confirm this. might be a weird trick with revealed states
    maybe find how many states are possible with raw values
'''

#### Constants and loading

In [6]:
import numpy as np
import time
import copy

from vgc.datatypes.Objects import PkmTeam, Pkm, GameState, Weather
from vgc.engine.PkmBattleEnv import PkmBattleEnv
from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator

from vgc.datatypes.Constants import TYPE_CHART_MULTIPLIER, MAX_HIT_POINTS, MOVE_MAX_PP, DEFAULT_TEAM_SIZE
from vgc.datatypes.Objects import PkmMove, Pkm, PkmTeam, GameState, Weather
from vgc.datatypes.Types import PkmStat, PkmType, WeatherCondition, \
    N_TYPES, N_STATUS, N_STATS, N_ENTRY_HAZARD, N_WEATHER, PkmStatus, PkmEntryHazard


import pprint
import pickle

In [7]:
'''
grab the value from the lookup dict and see if can be used

DRY for the two loops

review flow of all code

reveiw and test all parts of the code

can this be generalized more?
    ie not just for the first move when eval but any move?
    idk, probably not for now

due to the variability in outcomes I think I need a ton of results to tell if swap is better or not
    like 1000 for each state and since like 500,000 states that is 500,000,000 battles

TEST need to turn the states into the dict
    TEST want to store counts and running mean of the reward

TEST set up the evaluation loop for this agent vs. the base always attack agent

TEST need to test how many battles can get through in how much time

TEST saving the action dict

TEST need to select the initial action
TEST need to then to attack later
TEST need to convert the attack action into the best attack
TEST need to convert the swap action into a swap
TEST need to store the result of the battle
TEST convert outcome into a rewards

Later

functionalize the build dict loop
functionalize the eval
can run the dict and eval in python files
can parallelize the dict building

maybe store all states in a list then to the dict
    works I think as long as all actions past that point are attacks
    could then combine that later with a swap at that point maybe?
        idk maybe not... could be the 2nd swap and then things are necessarily clear
            ie initial pkm has been revealed and may have taken dmg (though maybe that doesn't matter)

possibly store the state dict attack action for non first actions as well

            
need to add the hiding part
    i guess it's more like some states don't know opp dmg to current pkm and sometimes do

    maybe parallelize if any of this works


can I get the best dmg from both teams even if the pkm stuff is hidden and not revealed?
    probably yes since passing in the team specific state

can check to see how accurate the attack function is
'''

"\n\nneed to turn the states into the dict\n    TEST want to store counts and running mean of the reward\n\nset up the evaluation loop for this agent vs. the base always attack agent\n\nTEST need to test how many battles can get through in how much time\n\nTEST saving the action dict\n\nTEST need to select the initial action\nTEST need to then to attack later\nTEST need to convert the attack action into the best attack\nTEST need to convert the swap action into a swap\nTEST need to store the result of the battle\nTEST convert outcome into a rewards\n\nLater\n\nfunctionalize the build dict loop\nfunctionalize the eval\ncan run the dict and eval in python files\ncan parallelize the dict building\n\nmaybe store all states in a list then to the dict\n    works I think as long as all actions past that point are attacks\n    could then combine that later with a swap at that point maybe?\n        idk maybe not... could be the 2nd swap and then things are necessarily clear\n            ie init

#### Functions for Loops

In [172]:
def turn_agent_action_into_env_action(action, agent_game_state):
        '''
        Action values are
        0: select best move
        1: switch to first pkm
        2: switch to second pkm

        Env actions are
        0 to 3: action of active pokm
        4: switch to first pkm
        5: switch to second pkm
        '''
        # always get best move and action dmg list
        best_active_action, best_damage_list = get_best_active_damage_action(agent_game_state)

        if action == 0:
            # get best dmg action
            action = best_active_action
        else:
            # switch to first or second pkm if alive
            if action == 1 or action == 2:
                pkm = agent_game_state.teams[0].party[action-1]
                if pkm.fainted() or pkm.hp <= 0.0:
                    action = best_active_action
                else:
                    action = action + 3
            else:
                action = best_active_action

        return action, best_damage_list


def get_best_active_damage_action(g: GameState):
    '''
    '''
    # Get weather condition
    weather = g.weather.condition

    # Get my Pokémon team
    my_team = g.teams[0]
    my_pkms = [my_team.active] + my_team.party

    # Get opponent's team
    opp_team = g.teams[1]
    opp_active = opp_team.active

    opp_active_type = opp_active.type
    opp_defense_stage = opp_team.stage[PkmStat.DEFENSE]

    # Iterate over all my Pokémon and their moves to find the most damaging move
    best_dmg_list = []
    best_move_list = []

    for i, pkm in enumerate(my_pkms):
        # Initialize variables for the best move and its damage
        best_damage = -np.inf
        best_move_id = -1

        if i == 0:
            my_attack_stage = my_team.stage[PkmStat.ATTACK]
        else:
            my_attack_stage = 0

        for j, move in enumerate(pkm.moves):
            
            damage = estimate_damage(move.type, pkm.type, move.power, opp_active_type, my_attack_stage,
                                        opp_defense_stage, weather)
            
            # Check if the current move has higher damage than the previous best move
            if damage > best_damage:
                best_move_id = j + i * 4 # think for 2024 j is 0 to 3 for each
                best_damage = damage

        # get best move and dmg for each pokemon
        best_dmg_list.append(best_damage)
        best_move_list.append(best_move_id)

    active_pkm_best_move_id = best_move_list[0]

    if active_pkm_best_move_id < 0 or active_pkm_best_move_id > 3:
        print(f"Error: best move id { active_pkm_best_move_id } not in expected range")
        active_pkm_best_move_id = 0

    return active_pkm_best_move_id, best_dmg_list


def estimate_damage(move_type: PkmType, pkm_type: PkmType, move_power: float, opp_pkm_type: PkmType,
                    attack_stage: int, defense_stage: int, weather: WeatherCondition) -> float:
        '''
        Not from original code. from updated repo
        '''
        stab = 1.5 if move_type == pkm_type else 1.
        if (move_type == PkmType.WATER and weather == WeatherCondition.RAIN) or (
                move_type == PkmType.FIRE and weather == WeatherCondition.SUNNY):
            weather = 1.5
        elif (move_type == PkmType.WATER and weather == WeatherCondition.SUNNY) or (
                move_type == PkmType.FIRE and weather == WeatherCondition.RAIN):
            weather = .5
        else:
            weather = 1.
        stage_level = attack_stage - defense_stage
        stage = (stage_level + 2.) / 2 if stage_level >= 0. else 2. / (np.abs(stage_level) + 2.)
        damage = TYPE_CHART_MULTIPLIER[move_type][opp_pkm_type] * stab * weather * stage * move_power

        #print(damage, move_type, pkm_type, move_power, opp_pkm_type, attack_stage, defense_stage, weather)
        return damage


def save_object_as_pkl(object_to_save, save_tag):
    '''
    Save object a pickle file
    '''
    with open(f'{save_tag}.pickle', 'wb') as handle:
        pickle.dump(object_to_save, handle, protocol=pickle.HIGHEST_PROTOCOL)

# make a dict that has keys for 0 to 100 and values for the action dict
def make_lookup_dict():
    lookup_dict = {}
    for i in range(100):
        if i <= 40:
            lookup_value = i // 5
        else:
            lookup_value = 4 + i // 10
        lookup_dict[i] = lookup_value
    return lookup_dict

# lookup_dict = make_lookup_dict()
# pprint.pprint(lookup_dict)

def get_win_loss_reward(terminated, winner, player_index):
    '''
    Does a reward for winning or losing
    winner is -1 unless a winner has been picked
    '''
    reward = 0.
    if terminated:

        if winner == 0 or winner == 1:
            if winner == player_index:
                reward = 1.
            else:
                reward = -1.
        #print(f"reward {reward} | terminated {terminated} | winner {self.env.winner} | player_index {player_index}|")
    return reward

def get_running_mean(old_mean, old_count, new_value):
    '''
    '''
    new_mean = (old_mean * old_count + new_value) / (old_count + 1)
    
    return new_mean


def add_results_to_action_dict(action_dict, state_key, agent_first_move, win_int):
    '''
    '''
    count_key = "count"
    sum_wins_key = "sum_wins"

    if state_key in action_dict:
        if agent_first_move in action_dict[state_key]:
            action_dict[state_key][agent_first_move][sum_wins_key] += win_int
            action_dict[state_key][agent_first_move][count_key] += 1
        else:
            action_dict[state_key][agent_first_move] = {}
            action_dict[state_key][agent_first_move][sum_wins_key] = win_int
            action_dict[state_key][agent_first_move][count_key] = 1
    else:
        action_dict[state_key] = {}
        action_dict[state_key][agent_first_move] = {}
        action_dict[state_key][agent_first_move][sum_wins_key] = win_int
        action_dict[state_key][agent_first_move][count_key] = 1

def add_action_to_pkm_env_action_dict(env_action, my_dict, team_key):
    if env_action in my_dict[team_key]:
        my_dict[team_key][env_action] += 1
    else:
        my_dict[team_key][env_action] = 1

    return my_dict



# def add_results_to_action_dict(action_dict, state_key, agent_first_move, agent_reward):
#     '''
#     '''
#     if state_key in action_dict:
#         if agent_first_move in action_dict[state_key]:
#             action_dict[state_key][agent_first_move]["avg_reward"] = get_running_mean(action_dict[state_key][agent_first_move]["avg_reward"],
#                                                                                   action_dict[state_key][agent_first_move]["count"], agent_reward)
#             action_dict[state_key][agent_first_move]["count"] += 1
#         else:
#             action_dict[state_key][agent_first_move] = {}
#             action_dict[state_key][agent_first_move]["avg_reward"] = agent_reward
#             action_dict[state_key][agent_first_move]["count"] = 1

# a = (1, 2, 3)
# type(a)
# # combine two tuples
# b = a + (4, 5, 6)
# b
# # append the value 7 to the tuple
# b = b + (7,)
# b


def get_hp_array(game_state_agent, game_state_opp):
    '''
    '''
    agent_pkm_hp_list = [game_state_agent.teams[0].active.hp]

    for pkm in game_state_agent.teams[0].party:
        agent_pkm_hp_list.append(pkm.hp)

    opp_active_pkm_hp = game_state_opp.teams[0].active.hp

    hp_array = np.array(agent_pkm_hp_list + [opp_active_pkm_hp])

    return hp_array

def get_hp_list(game_state_agent, game_state_opp):
    '''
    '''
    agent_pkm_hp_list = [game_state_agent.teams[0].active.hp]

    for pkm in game_state_agent.teams[0].party:
        agent_pkm_hp_list.append(pkm.hp)

    opp_active_pkm_hp = game_state_opp.teams[0].active.hp

    hp_list = agent_pkm_hp_list + [opp_active_pkm_hp]

    return hp_list

def turn_game_state_into_dict_key(game_state_agent, game_state_opp,lookup_dict,
    dmg_array,                      
    pkm_hp_max = 480., dmg_scale_value = 600.):
    '''
    tuple is (
        # HP
        agent_active_pkm_hp, agent_party+_pkm_hp, opp_active_pkm_hp,
        # DMG to opp
        agent_active_pkm_dmg, agent_party_pkm_dmg,
        # dmg from opp
        # do this later
        )
    
    If everything is on the scale of 0 to 100 picturing
    8 buckets from 0 to 40 with increments of 5
    6 buckets from 40 to 100 with increments of 10

    preload a dict with the look up, then scal everything here

    scaling dmg more than max hp so if move can do over 480 dmg has some sort of knowledge ofit
    '''
    # get arrays to make tuples out of for dict key
    hp_array = get_hp_array(game_state_agent, game_state_opp)

    hp_tuple = scale_hp_and_get_dict_value(hp_array, pkm_hp_max, lookup_dict)
    dmg_tuple = scale_hp_and_get_dict_value(dmg_array, dmg_scale_value, lookup_dict)

    dict_key = hp_tuple + dmg_tuple

    return dict_key


def scale_hp_and_get_dict_value(hp_array, max_hp, lookup_dict):
    '''
    '''

    # scale by max hp then multiply by 100 to get into 0 to 99 range
    hp_array = (hp_array / max_hp) * 100
    # round, convert to int then clip to 0 to 99
    hp_array = hp_array.round(0).astype(int).clip(0, 99)

    

    # for hp in hp_array:
    #     hp_values_from_dict_tuple = hp_values_from_dict_tuple + (lookup_dict[hp],)

    hp_values_from_dict_tuple = tuple(lookup_dict[hp] for hp in hp_array)

    return hp_values_from_dict_tuple


    

In [136]:
# lookup_dict = make_lookup_dict()

# a = turn_game_state_into_dict_key(game_state[0], game_state[1], lookup_dict, dmg_array)

# print(a)

# a = (1, 2, 3)
# type(a)
# # combine two tuples
# b = a + (4, 5, 6)
# b
# print(b)
# print(type(b))

In [140]:
# team_generator = RandomTeamGenerator(2)

# agent_team = team_generator.get_team().get_battle_team([0, 1, ])
# opp_team = team_generator.get_team().get_battle_team([0, 1, ])

# # set new environment with teams
# env = PkmBattleEnv((agent_team, opp_team),
#                 encode=(False, False)) 

# game_state, info = env.reset()

# # for pkm in game_state[0].teams[1].party:
# #     print(pkm.hp, pkm.fainted())
# #     # if pkm.hp > 0.0 or not pkm.fainted():
# #     #     is_more_than_opp_pkm_alive = True
# #     #     break



(<vgc.engine.PkmBattleEnv.PkmBattleEnv at 0x17e4a228ac0>,
 <vgc.engine.PkmBattleEnv.PkmBattleEnv at 0x17e4a2c5070>)

True

##### train eval loop function

In [None]:
def get_best_statistical_action(action_dict, state_key):
    '''
    (8, 8, 6, 5, 4) {'attack': {'sum_wins': 89, 'count': 194}, 'swap': {'sum_wins': 52, 'count': 167}}
    '''
    best_action = 0
    attack_key = 'attack'
    swap_key = 'swap'
    win_key = 'sum_wins'
    count_key = 'count'

    STOPPED HERE
    # check if state_key in action_dict
    # get the count and the wins
    # plut them into the chi squared test

    # if state_key in action_dict:
    #     best_action = max(action_dict[state_key], key=action_dict[state_key].get)
    # else:
    #     best_action = 0

    return best_action

In [161]:
def build_train_eval_loop(num_battles, is_eval, run_tag, is_save=True, action_dict_to_copy=None):
    '''
    STOPPED HERE NEED TO WORK IN THE EVAL CODE
    then review logic then test
    '''
    if is_eval and action_dict_to_copy is not None:
        action_lookup_dict = copy.deepcopy(action_dict_to_copy)

    winner_dict = {
        0:0,
        1:0,
        -1:0
    }

    pkm_env_action_dict = {
        0:{},
        1:{},
    }

    action_state_results_dict = {}

    max_episode_steps = 250
    agent_index = 0

    lookup_dict = make_lookup_dict()
    team_generator = RandomTeamGenerator(2)

    time_int = int(time.time())
    save_tag =  f"_{run_tag}_{time_int}"
    start_time = time.time()


    for battle_idx in range(num_battles):
        
        agent_team = team_generator.get_team().get_battle_team([0, 1, ])
        opp_team = team_generator.get_team().get_battle_team([0, 1, ])

        # set new environment with teams
        env = PkmBattleEnv((agent_team, opp_team), encode=(False, False)) 

        game_state, info = env.reset()

        is_first_move = True
        agent_first_move = None
        state_key = None

        for episode_step in range(max_episode_steps):
            if is_first_move:
                if np.random.rand() < 0.5:
                    agent_pre_env_action = 0
                    agent_first_move = 'attack'
                else:
                    agent_pre_env_action = 1
                    agent_first_move = 'swap'
                is_first_move = False
            else:
                agent_pre_env_action = 0

            agent_env_action, agent_team_best_damage_list = turn_agent_action_into_env_action(agent_pre_env_action, game_state[0])
            opp_action, opp_best_damage_list = get_best_active_damage_action(game_state[1])

            if agent_pre_env_action == 1 and agent_env_action != 4:
                print("Error agent action is 1 but env action is not 4 ")
            elif agent_pre_env_action == 0:
                if (agent_env_action < 0 or agent_env_action > 3):
                    print("Error agent action is 0 but env action is not 0 to 3 ")
                elif len(agent_team_best_damage_list) == 0:
                    print("Error agent action is 0 but best damage list is empty")
                elif agent_team_best_damage_list[0] < 0:
                    print("Error agent action is 0 but best damage is negative")

            if opp_action < 0 or opp_action > 3:
                print("Error opp action is not 0 to 3")
            elif len(opp_best_damage_list) == 0:
                print("Error opp best damage list is empty")
            elif opp_best_damage_list[0] < 0:
                print("Error opp best damage is negative")
            
            # get the state key
            # only do this on the initial set up of the env
            if state_key is None:
                # for now just doing part dmg to opp
                #dmg_array = np.array(agent_best_damage_list + [opp_best_damage,])
                dmg_array = np.array(agent_team_best_damage_list)
                state_key = turn_game_state_into_dict_key(game_state[0], game_state[1], lookup_dict, dmg_array)
                if len(state_key) != 5:
                    print("Error state key is not 5 long")

                if is_eval:
                    # see if action look up dict says to do a different action
                    if state_key in action_lookup_dict:
                        # do logic for get if action is better than swap
                        STOPPED HERE
                        agent_pre_env_action = ...
                        agent_env_action = turn_agent_action_into_env_action(agent_pre_env_action, game_state[0])

                    pkm_env_action_dict = add_action_to_pkm_env_action_dict(agent_env_action, pkm_env_action_dict, 0)
                    pkm_env_action_dict = add_action_to_pkm_env_action_dict(opp_action, pkm_env_action_dict, 1)

            # enter action and step the env
            action_list = [agent_env_action, opp_action]
            game_state, _not_used_reward, terminated, truncated, info = env.step(action_list)  # for inference, we don't need reward

            if episode_step == max_episode_steps - 1:
                print('Warning: max steps reached')
                terminated = True

            if terminated:
                winner = env.winner
                if winner == agent_index:
                    win_int = 1
                else:
                    win_int = 0

                add_results_to_action_dict(action_state_results_dict, state_key, agent_first_move, win_int)

                if winner in winner_dict:
                    winner_dict[winner] += 1
                break

    end_time = time.time()
    print(f"Time to run {(end_time - start_time) / 60:.3f} minutes")
    print(f"Time to run {(end_time - start_time) / num_battles:.3f} seconds per battle")
    print(f"Time to run {((end_time - start_time) / num_battles / 60 / 60) * 1000000:.3f} hours per million battles")

    print(winner_dict)

    if is_save:
        save_object_as_pkl(action_state_results_dict , f'action_dict_{save_tag}')
        save_object_as_pkl(action_state_results_dict , f'action_dict_{winner_dict}')

    return winner_dict, action_state_results_dict, pkm_env_action_dict
    

In [162]:
test_wd, test_ad, test_pead = build_train_eval_loop(10, is_eval=False, run_tag="test", is_save=False )
print(test_wd)
print(len(test_ad))

Time to run 0.001 minutes
Time to run 0.005 seconds per battle
Time to run 1.500 hours per million battles
{0: 5, 1: 5, -1: 0}
{0: 5, 1: 5, -1: 0}
10


#### Build Dict Loop

In [137]:
num_battles = 1000000

winner_dict = {
    0:0,
    1:0,
    -1:0
}

pkm_env_action_dict = {
    0:{},
    1:{},
}

action_state_results_dict = {}

max_episode_steps = 250
agent_index = 0

lookup_dict = make_lookup_dict()
team_generator = RandomTeamGenerator(2)

time_int = int(time.time())
save_tag =  f"_smoke_test_{time_int}"
start_time = time.time()


for battle_idx in range(num_battles):
    
    agent_team = team_generator.get_team().get_battle_team([0, 1, ])
    opp_team = team_generator.get_team().get_battle_team([0, 1, ])

    # set new environment with teams
    env = PkmBattleEnv((agent_team, opp_team),
                   encode=(False, False)) 

    game_state, info = env.reset()

    is_first_move = True
    agent_first_move = None
    state_key = None

    for episode_step in range(max_episode_steps):
        if is_first_move:
            if np.random.rand() < 0.5:
                agent_pre_env_action = 0
                agent_first_move = 'attack'
            else:
                agent_pre_env_action = 1
                agent_first_move = 'swap'
            is_first_move = False
        else:
            agent_pre_env_action = 0

        agent_env_action, agent_team_best_damage_list = turn_agent_action_into_env_action(agent_pre_env_action, game_state[0])
        opp_action, opp_best_damage_list = get_best_active_damage_action(game_state[1])

        if agent_pre_env_action == 1 and agent_env_action != 4:
            print("Error agent action is 1 but env action is not 4 ")
        elif agent_pre_env_action == 0:
            if (agent_env_action < 0 or agent_env_action > 3):
                print("Error agent action is 0 but env action is not 0 to 3 ")
            elif len(agent_team_best_damage_list) == 0:
                print("Error agent action is 0 but best damage list is empty")
            elif agent_team_best_damage_list[0] < 0:
                print("Error agent action is 0 but best damage is negative")

        if opp_action < 0 or opp_action > 3:
            print("Error opp action is not 0 to 3")
        elif len(opp_best_damage_list) == 0:
            print("Error opp best damage list is empty")
        elif opp_best_damage_list[0] < 0:
            print("Error opp best damage is negative")
        
        # get the state key
        # only do this on the initial set up of the env
        if state_key is None:
            # for now just doing part dmg to opp
            #dmg_array = np.array(agent_best_damage_list + [opp_best_damage,])
            dmg_array = np.array(agent_team_best_damage_list)
            state_key = turn_game_state_into_dict_key(game_state[0], game_state[1], lookup_dict, dmg_array)
            if len(state_key) != 5:
                print("Error state key is not 5 long")

        # enter action and step the env
        action_list = [agent_env_action, opp_action]
        game_state, _not_used_reward, terminated, truncated, info = env.step(action_list)  # for inference, we don't need reward

        if episode_step == max_episode_steps - 1:
            print('Warning: max steps reached')
            terminated = True

        if terminated:
            winner = env.winner
            if winner == agent_index:
                win_int = 1
            else:
                win_int = 0

            add_results_to_action_dict(action_state_results_dict, state_key, agent_first_move, win_int)
            break

end_time = time.time()
print(f"Time to run {(end_time - start_time) / 60:.3f} minutes")
print(f"Time to run {(end_time - start_time) / num_battles:.3f} seconds per battle")
print(f"Time to run {((end_time - start_time) / num_battles / 60 / 60) * 1000000:.3f} hours per million battles")

print(winner_dict)
# print(action_dict)


save_object_as_pkl(action_state_results_dict , 'action_dict_smoke_test')


Time to run 73.454 minutes
Time to run 0.004 seconds per battle
Time to run 1.224 hours per million battles
{0: 0, 1: 0, -1: 0}


array([], dtype=float64)

In [138]:
len(action_state_results_dict)

37543

##### checking action dict results


In [147]:
# for k, v in action_state_results_dict.items():
#     print(k, v)
#     break

# tuple is (
#     # HP
#     agent_active_pkm_hp, agent_party+_pkm_hp, opp_active_pkm_hp,
#     # DMG to opp
#     agent_active_pkm_dmg, agent_party_pkm_dmg,
#     # dmg from opp
#     # do this later
# )
    

(8, 8, 6, 5, 4) {'attack': {'sum_wins': 89, 'count': 194}, 'swap': {'sum_wins': 52, 'count': 167}}


In [156]:
len(count_list)

37543

In [157]:
as_copy = copy.deepcopy(action_state_results_dict)

count_list = []

for k, v in as_copy.items():
    attack_count = v.get('attack', {}).get('count', 0)
    swap_count = v.get('swap', {}).get('count', 0)
    attack_wins = v.get('attack', {}).get('sum_wins', 0)
    swap_wins = v.get('swap', {}).get('sum_wins', 0)

    total_count = attack_count + swap_count
    count_list.append(total_count)

    if attack_count > 0:
        attack_win_percent = attack_wins / attack_count
    else:
        attack_win_percent = None

    if swap_count > 0:
        swap_win_percent = swap_wins / swap_count
    else:
        swap_win_percent = None

    if attack_win_percent is not None and swap_win_percent is not None and total_count > 100:
        if swap_win_percent - attack_win_percent > 0.05:
            print(k, np.round(attack_win_percent,3), np.round(swap_win_percent,3), attack_count, swap_count)

    
print(f"count statistics {np.mean(count_list):.3f} {np.std(count_list):.3f} {np.min(count_list)} {np.max(count_list)} {np.median(count_list)}")

# find number of counts >= x
count_array = np.array(count_list)
for x in [50, 75, 100, 150, 200]:
    print(x, count_array[count_array >= x].shape)

(5, 11, 8, 5, 3) 0.082 0.135 61 52
(8, 8, 9, 8, 9) 0.5 0.576 78 66
(6, 6, 11, 8, 8) 0.528 0.604 72 96
(5, 5, 11, 9, 9) 0.476 0.622 63 45
(5, 8, 11, 9, 8) 0.441 0.535 59 71
(5, 5, 11, 8, 8) 0.358 0.45 106 100
(8, 5, 10, 8, 9) 0.44 0.508 50 65
(8, 11, 8, 5, 5) 0.194 0.254 67 63
(5, 5, 9, 8, 5) 0.312 0.365 64 52
(6, 6, 8, 5, 11) 0.229 0.321 48 53
(8, 11, 8, 5, 6) 0.355 0.462 62 52
(8, 6, 11, 8, 8) 0.536 0.589 69 90
(8, 5, 11, 9, 8) 0.456 0.518 57 56
(6, 6, 5, 10, 9) 0.419 0.481 74 52
(5, 11, 8, 7, 3) 0.23 0.283 61 46
(8, 6, 10, 8, 9) 0.472 0.576 53 59
count statistics 26.636 64.517 1 1144 6.0
50 (5014,)
75 (3380,)
100 (2475,)
150 (1425,)
200 (883,)


In [139]:
# action_state_results_dict

In [95]:
state_key

({8}, {10})

#### Eval Agents Loop
* based on two agents and results, see how statistically signficant the difference is

0

In [91]:
num_battles = 10

winner_dict = {
    0:0,
    1:0,
    -1:0
}

pkm_env_action_dict = {
    0:{},
    1:{},
}

action_lookup_dict = copy.deepcopy(action_state_results_dict)

max_episode_steps = 250
agent_index = 0

lookup_dict = make_lookup_dict()
team_generator = RandomTeamGenerator(2)

time_int = int(time.time())
save_tag =  f"_smoke_test_winner_dict_{time_int}"
start_time = time.time()


for battle_idx in range(num_battles):
    
    agent_team = team_generator.get_team().get_battle_team([0, 1, ])
    opp_team = team_generator.get_team().get_battle_team([0, 1, ])

    # set new environment with teams
    env = PkmBattleEnv((agent_team, opp_team),
                   encode=(False, False)) 
 
    game_state, info = env.reset()

    # is_first_move = True
    # agent_first_move = None
    state_key = None

    for episode_step in range(max_episode_steps):

        agent_pre_env_action = 0
        agent_env_action, agent_team_best_damage_list = turn_agent_action_into_env_action(agent_pre_env_action, game_state[0])
        opp_action, opp_best_damage_list = get_best_active_damage_action(game_state[1])

        if opp_action < 0 or opp_action > 3:
            print("Error opp action is not 0 to 3")
        elif len(opp_best_damage_list) == 0:
            print("Error opp best damage list is empty")
        elif opp_best_damage_list[0] < 0:
            print("Error opp best damage is negative")
            
        if state_key is None:
            # for now just doing part dmg to opp
            #dmg_array = np.array(agent_best_damage_list + [opp_best_damage,])
            dmg_array = np.array(agent_team_best_damage_list)
            state_key = turn_game_state_into_dict_key(game_state[0], game_state[1], lookup_dict, dmg_array)

            # for now only override the initial action, could change this to possibly swap later as well
            if state_key in action_lookup_dict:
                agent_env_action = max(action_lookup_dict[state_key], key=action_lookup_dict[state_key].get)


        if agent_pre_env_action == 1 and agent_env_action != 4:
            print("Error agent action is 1 but env action is not 4 ")
        elif agent_pre_env_action == 0:
            if (agent_env_action < 0 or agent_env_action > 3):
                print("Error agent action is 0 but env action is not 0 to 3 ")
            elif len(agent_team_best_damage_list) == 0:
                print("Error agent action is 0 but best damage list is empty")
            elif agent_team_best_damage_list[0] < 0:
                print("Error agent action is 0 but best damage is negative")

        
   
        # enter action and step the env
        action_list = [agent_env_action, opp_action]

        pkm_env_action_dict = add_action_to_pkm_env_action_dict(agent_env_action, pkm_env_action_dict, 0)
        pkm_env_action_dict = add_action_to_pkm_env_action_dict(opp_action, pkm_env_action_dict, 1)

        #print(action_list)
        game_state, _not_used_reward, terminated, truncated, info = env.step(action_list)  # for inference, we don't need reward

        if episode_step == max_episode_steps - 1:
            print('Warning: max steps reached')
            terminated = True

        if terminated:
            winner = env.winner
            if winner == agent_index:
                win_int = 1
            else:
                win_int = 0

            if winner in winner_dict:
                winner_dict[winner] += 1

            break

end_time = time.time()
print(f"Time to run {(end_time - start_time) / 60:.3f} minutes")
print(f"Time to run {(end_time - start_time) / num_battles:.3f} seconds per battle")
print(f"Time to run {((end_time - start_time) / num_battles / 60 / 60) * 1000000:.3f} hours per million battles")

print(winner_dict)
print(pkm_env_action_dict)

save_object_as_pkl(winner_dict, 'eval_winner_dict_smoke_test')

Time to run 0.038 minutes
Time to run 0.005 seconds per battle
Time to run 1.281 hours per million battles
{0: 236, 1: 264, -1: 0}
{0: {3: 311, 1: 316, 0: 510, 2: 314}, 1: {0: 528, 3: 302, 1: 324, 2: 297}}


#### See if results significant
* should probably do this with ELO?

In [92]:
from scipy.stats import chi2_contingency

def chi_square_test(win_loss_draw1, win_loss_draw2):
    """
    Performs a Chi-square test on two entities with win, loss, and draw counts.
    
    Parameters:
    - win_loss_draw1: A list or tuple of win, loss, and draw counts for entity 1 (e.g., [wins, losses, draws]).
    - win_loss_draw2: A list or tuple of win, loss, and draw counts for entity 2.
    
    Returns:
    - Chi-square statistic, p-value, and interpretation as a string.
    """
    # Create a contingency table
    contingency_table = [win_loss_draw1, win_loss_draw2]
    
    # Perform the Chi-square test
    chi2, pval, dof, expected = chi2_contingency(contingency_table)
    
    # Interpret the results
    interpretation = ("There is a statistically significant difference in outcomes between the two entities."
                      if pval < 0.05 else
                      "There is no statistically significant difference in outcomes between the two entities.")
    
    print(f'Player 1 Win rate: { win_loss_draw1[0] / sum(win_loss_draw1):.3f}')


    print(f'Chi-square statistic: {chi2:.3f}')
    print(f'P-value: {pval:.5f}')

    return chi2, pval, interpretation

# # Example usage
#chi2, pval, interpretation = chi_square_test([120, 80, 50], [130, 70, 60])

chi2, pval, interpretation = chi_square_test([winner_dict[0],
                                              winner_dict[1],
                                              #winner_dict[-1]
                                              ],
                                             [winner_dict[1],
                                              winner_dict[0],
                                              #winner_dict[-1]
                                              ],)

Player 1 Win rate: 0.472
Chi-square statistic: 2.916
P-value: 0.08771


In [83]:
# test where it is significant

for x in range(150, 200, 5):
    winner_dict = {
        0:x,
        1:300-x,
        -1:0
    }
    print("--- x is ", x)
    chi2, pval, interpretation = chi_square_test([winner_dict[0],
                                                winner_dict[1],
                                                #winner_dict[-1]
                                                ],
                                                [winner_dict[1],
                                                winner_dict[0],
                                                #winner_dict[-1]
                                                ],)
    print("________")

--- x is  150
Player 1 Win rate: 0.500
Chi-square statistic: 0.000
P-value: 1.00000
________
--- x is  155
Player 1 Win rate: 0.517
Chi-square statistic: 0.540
P-value: 0.46243
________
--- x is  160
Player 1 Win rate: 0.533
Chi-square statistic: 2.407
P-value: 0.12082
________
--- x is  165
Player 1 Win rate: 0.550
Chi-square statistic: 5.607
P-value: 0.01789
________
--- x is  170
Player 1 Win rate: 0.567
Chi-square statistic: 10.140
P-value: 0.00145
________
--- x is  175
Player 1 Win rate: 0.583
Chi-square statistic: 16.007
P-value: 0.00006
________
--- x is  180
Player 1 Win rate: 0.600
Chi-square statistic: 23.207
P-value: 0.00000
________
--- x is  185
Player 1 Win rate: 0.617
Chi-square statistic: 31.740
P-value: 0.00000
________
--- x is  190
Player 1 Win rate: 0.633
Chi-square statistic: 41.607
P-value: 0.00000
________
--- x is  195
Player 1 Win rate: 0.650
Chi-square statistic: 52.807
P-value: 0.00000
________


In [68]:
from scipy.stats import ttest_ind


def compare_rates(successes_group1, observations_group1, successes_group2, observations_group2):
    """
    Compares two observed rates using a two-sample t-test and prints the t-statistic and p-value.
    
    Parameters:
    - successes_group1: Number of successes in group 1
    - observations_group1: Number of observations in group 1
    - successes_group2: Number of successes in group 2
    - observations_group2: Number of observations in group 2
    """
    # Calculate rates
    rate_group1 = successes_group1 / observations_group1
    rate_group2 = successes_group2 / observations_group2

    # Convert rates to "success" arrays
    data_group1 = np.array([1] * successes_group1 + [0] * (observations_group1 - successes_group1))
    data_group2 = np.array([1] * successes_group2 + [0] * (observations_group2 - successes_group2))

    # Perform the two-sample t-test
    stat, pval = ttest_ind(data_group1, data_group2)

    # Print rounded t-statistic and p-value
    print("Rates: {:.3f} vs. {:.3f}".format(rate_group1, rate_group2))
    print(f'T-statistic: {stat:.5f}')
    print(f'P-value: {pval:.5f}')

# Example usage
#compare_rates(45, 100, 50, 120)

compare_rates(winner_dict[0], num_battles, winner_dict[1], num_battles)

Rates: 0.540 vs. 0.460
T-statistic: 1.96261
P-value: 0.05015


In [70]:
for test_wins in [150, 160, 170, 175, 200, 225, 250, 275]:
    print(test_wins)
    compare_rates(test_wins, num_battles, num_battles-test_wins, num_battles)

150
Rates: 0.500 vs. 0.500
T-statistic: 0.00000
P-value: 1.00000
160
Rates: 0.533 vs. 0.467
T-statistic: 1.63390
P-value: 0.10281
170
Rates: 0.567 vs. 0.433
T-statistic: 3.28991
P-value: 0.00106
175
Rates: 0.583 vs. 0.417
T-statistic: 4.13349
P-value: 0.00004
200
Rates: 0.667 vs. 0.333
T-statistic: 8.64581
P-value: 0.00000
225
Rates: 0.750 vs. 0.250
T-statistic: 14.11855
P-value: 0.00000
250
Rates: 0.833 vs. 0.167
T-statistic: 21.87236
P-value: 0.00000
275
Rates: 0.917 vs. 0.083
T-statistic: 36.86585
P-value: 0.00000


In [36]:
print(episode_step)

3


In [31]:
env.step(action_list) 

((<vgc.engine.PkmBattleEnv.PkmBattleEnv at 0x17e31fafac0>,
  <vgc.engine.PkmBattleEnv.PkmBattleEnv at 0x17e31fa12e0>),
 [1.0, 1.0],
 True,
 False,
 {})

In [27]:
.266 / 60 * 1000000 /60

73.88888888888889

### Testing dict size with raw values
* so with 5m iters, like 4.7m different dict values
* so some grouping needs to be done

In [180]:
test_iters = 5000000

time_start = time.time()

raw_stats_dict = {}
team_generator = RandomTeamGenerator(2)

for i in range(test_iters):
    agent_team = team_generator.get_team().get_battle_team([0, 1, ])
    opp_team = team_generator.get_team().get_battle_team([0, 1, ])

    game_state, info = env.reset()

    env = PkmBattleEnv((agent_team, opp_team), encode=(False, False)) 

    game_state, info = env.reset()

    _agent_env_action, agent_team_best_damage_list = turn_agent_action_into_env_action(0, game_state[0])
    _opp_action, opp_best_damage_list = get_best_active_damage_action(game_state[1])

    hp_list = get_hp_list(game_state[0], game_state[1])
    #print(hp_list, agent_team_best_damage_list, opp_best_damage_list)

    state_tuple = tuple(hp_list + agent_team_best_damage_list + opp_best_damage_list)
    #print(state_tuple)

    if state_tuple in raw_stats_dict:
        raw_stats_dict[state_tuple] += 1
    else:
        raw_stats_dict[state_tuple] = 1

time_end = time.time()
print(f"Time to run {time_end - time_start:.3f} seconds")

print(len(raw_stats_dict))

save_object_as_pkl(raw_stats_dict, 'raw_state_dict_smoke_test')


Time to run 10956.973 seconds
4750104


In [181]:
10956.973 /60 /60

3.043603611111111

In [182]:
raw_hp_act_dict = copy.deepcopy(raw_stats_dict)

In [None]:
# see the distribution of HP values and dmg values

In [184]:
for k, v in raw_hp_act_dict.items():
    print(k,v)
    break

(336.0, 120.0, 120.0, 102.0, 282.0, 630.0, 261.0) 1


In [189]:
# first 3 are HP, next four are dmg
hp_dict_count = {}
dmg_dict_count = {}
hp_list = []
dmg_list = []

for k, _ in raw_hp_act_dict.items():
    hp_key = k[:3]
    dmg_key = k[3:]

    for hp_value in hp_key:
        if hp_value in hp_dict_count:
            hp_dict_count[hp_value] += 1
        else:
            hp_dict_count[hp_value] = 1
        hp_list.append(hp_value)

    for dmg_value in dmg_key:
        if dmg_value in dmg_dict_count:
            dmg_dict_count[dmg_value] += 1
        else:
            dmg_dict_count[dmg_value] = 1
        dmg_list.append(dmg_value)
    

print(hp_dict_count)
print(dmg_dict_count)

{336.0: 484424, 120.0: 3779517, 156.0: 3378332, 264.0: 1144966, 192.0: 2453402, 228.0: 1692512, 408.0: 164303, 300.0: 758680, 372.0: 296296, 480.0: 22651, 444.0: 75229}
{102.0: 602589, 282.0: 644930, 630.0: 186926, 261.0: 947623, 423.0: 284529, 204.0: 680716, 132.0: 315649, 153.0: 904877, 414.0: 382101, 315.0: 693529, 99.0: 237773, 354.0: 175684, 210.0: 1395427, 138.0: 1062649, 211.5: 85819, 207.0: 1101337, 492.0: 287177, 564.0: 175764, 348.0: 629654, 198.0: 368290, 174.0: 1386176, 276.0: 798295, 66.0: 172952, 246.0: 978168, 369.0: 463244, 141.0: 153193, 420.0: 441586, 306.0: 491293, 130.5: 136611, 390.0: 53556, 318.0: 374454, 159.0: 104128, 105.0: 198419, 238.5: 48117, 522.0: 278020, 123.0: 165291, 738.0: 120375, 184.5: 125389, 103.5: 137683, 585.0: 21952, 846.0: 73532, 265.5: 22306, 477.0: 158584, 195.0: 16064, 87.0: 86796, 60.0: 31486, 90.0: 57314, 177.0: 51219, 636.0: 96930, 76.5: 66736, 531.0: 73829, 954.0: 40382, 45.0: 30880, 708.0: 44751, 157.5: 167336, 780.0: 13565, 1062.0: 186

##### notes
* so very few HP values
* also not that many low dmg values

In [197]:
pprint.pprint(hp_dict_count)

{120.0: 3779517,
 156.0: 3378332,
 192.0: 2453402,
 228.0: 1692512,
 264.0: 1144966,
 300.0: 758680,
 336.0: 484424,
 372.0: 296296,
 408.0: 164303,
 444.0: 75229,
 480.0: 22651}


In [None]:
'''
potential HP dict keys
400 and over
320 to 400
240 to 320
160 to 240
80 to 160
0 to 80


dmg values
0 to 80
80 to 160
160 to 240
240 to 320
320 to 400
400 to 480
480 to 560
560 +


5x5 would be like
0 to 96
96 to 192
192 to 288
288 to 384
384 to 480

then for dmg values could do like
0 to 120
120 to 240
240 to 360
360 to 480
480 +

'''

In [206]:
480 / 5

96.0

In [208]:
print(6**3 * 8**4)
print(6*6*6*8*8*8*8)

print(5**4 * 5**6)

print(4**4 * 4**6)

884736
884736
9765625
1048576


In [205]:


for a in range(3,9):
    for b in range(3,10):
        print(a, b, (a**3)*(b**4))

884736
884736
3 3 2187
3 4 6912
3 5 16875
3 6 34992
3 7 64827
3 8 110592
3 9 177147
4 3 5184
4 4 16384
4 5 40000
4 6 82944
4 7 153664
4 8 262144
4 9 419904
5 3 10125
5 4 32000
5 5 78125
5 6 162000
5 7 300125
5 8 512000
5 9 820125
6 3 17496
6 4 55296
6 5 135000
6 6 279936
6 7 518616
6 8 884736
6 9 1417176
7 3 27783
7 4 87808
7 5 214375
7 6 444528
7 7 823543
7 8 1404928
7 9 2250423
8 3 41472
8 4 131072
8 5 320000
8 6 663552
8 7 1229312
8 8 2097152
8 9 3359232


In [199]:
pprint.pprint(dmg_dict_count)

{0.0: 216,
 15.0: 746,
 22.5: 1014,
 30.0: 12627,
 33.0: 8382,
 45.0: 30880,
 49.5: 11328,
 51.0: 27591,
 60.0: 31486,
 66.0: 172952,
 69.0: 63996,
 76.5: 66736,
 87.0: 86796,
 90.0: 57314,
 99.0: 237773,
 102.0: 602589,
 103.5: 137683,
 105.0: 198419,
 123.0: 165291,
 130.5: 136611,
 132.0: 315649,
 138.0: 1062649,
 141.0: 153193,
 153.0: 904877,
 157.5: 167336,
 159.0: 104128,
 174.0: 1386176,
 177.0: 51219,
 184.5: 125389,
 195.0: 16064,
 198.0: 368290,
 204.0: 680716,
 207.0: 1101337,
 210.0: 1395427,
 211.5: 85819,
 238.5: 48117,
 246.0: 978168,
 261.0: 947623,
 265.5: 22306,
 276.0: 798295,
 282.0: 644930,
 292.5: 6595,
 306.0: 491293,
 315.0: 693529,
 318.0: 374454,
 348.0: 629654,
 354.0: 175684,
 369.0: 463244,
 390.0: 53556,
 414.0: 382101,
 420.0: 441586,
 423.0: 284529,
 477.0: 158584,
 492.0: 287177,
 522.0: 278020,
 531.0: 73829,
 564.0: 175764,
 585.0: 21952,
 630.0: 186926,
 636.0: 96930,
 708.0: 44751,
 738.0: 120375,
 780.0: 13565,
 846.0: 73532,
 954.0: 40382,
 1062.

In [191]:
print(len(hp_dict_count), len(dmg_dict_count))
print(11*11*11 * 67*67*67*67)

11 67
20152452


In [195]:
print(11**6 + 67**18 )

740195513856780056217081019504370


In [187]:
# can then put into pandas and see the distribution of values

(336.0, 120.0, 120.0)