In [5]:
import numpy as np
import pickle
import os
import copy
from scipy.stats import chi2_contingency
import time
import pprint

In [6]:
'''

'''

'\n\n'

### Constants and Functions

In [17]:
time_int = int(time.time())
swap_dict_save_tag = f"swap_recommended_dict_{time_int}"

In [18]:
def load_pkl_object(pkl_path):
    '''
    Load a pickle object
    '''
    with open(pkl_path, 'rb') as handle:
        return pickle.load(handle)


def save_object_as_pkl(object_to_save, save_tag):
    '''
    Save object a pickle file
    '''
    save_path = f'swap_dict_results\\{save_tag}.pickle'

    with open(save_path, 'wb') as handle:
        print("saving: ", save_path)
        pickle.dump(object_to_save, handle, protocol=pickle.HIGHEST_PROTOCOL)


def get_chi_square_test_from_count_wins(
    swap_wins, swap_count, attack_wins, attack_count,
    min_total_count=200,
    min_swap_count=100,
    min_attack_count=100,
    is_print_statistics=False):

    attack_action = 0
    swap_party_zero_action = 1

    is_use_p_value = False
    is_swap_better = False
    p_value = None
    swap_win_rate_better_rate = 0.
    recommended_action = attack_action

    try:
        total_count = swap_count + attack_count

        if total_count > min_total_count and swap_count > min_swap_count and attack_count > min_attack_count:

            swap_win_percent = swap_wins / swap_count
            attack_win_percent = attack_wins / attack_count
            
            if swap_win_percent > attack_win_percent:
                is_swap_better = True
                swap_win_rate_better_rate = swap_win_percent - attack_win_percent
            else:
                is_swap_better = False
                swap_win_rate_better_rate = 0.

            # chi squared table breaks down if any 0 values
            # really should not have less than 5
            if attack_wins == attack_count:
                recommended_action = attack_action
                # choose attack as attack always wins
                if is_print_statistics:
                    print("Attack always wins")
                    print(f"Swap win rate: {swap_wins / swap_count:.3f} | Count {swap_count}")
                    print(f"Attack win rate: {attack_wins / attack_count:.3f} | Count {attack_count}")
            elif swap_wins == swap_count:
                # choose swap
                is_use_p_value = True
                is_swap_better = True
                p_value = 0.
                recommended_action = swap_party_zero_action
                if is_print_statistics:
                    print("swap always wins, choosing swap")
                    print(f"Swap win rate: {swap_wins / swap_count:.3f} | Count {swap_count}")
                    print(f"Attack win rate: {attack_wins / attack_count:.3f} | Count {attack_count}")
            elif swap_wins == 0:
                recommended_action = attack_action
                # swap always loses
                if is_print_statistics:
                    print("Swap always loses")
                    print(f"Swap win rate: {swap_wins / swap_count:.3f} | Count {swap_count}")
                    print(f"Attack win rate: {attack_wins / attack_count:.3f} | Count {attack_count}")
            elif attack_wins == 0:
                # attack always loses and swap won at least once so choose swap
                is_use_p_value = True
                is_swap_better = True
                p_value = 0.
                recommended_action = swap_party_zero_action
                if is_print_statistics:
                    print("Attack always loses, choosing swap ")
                    print(f"Swap win rate: {swap_wins / swap_count:.3f} | Count {swap_count}")
                    print(f"Attack win rate: {attack_wins / attack_count:.3f} | Count {attack_count}")
            else:
                contingency_table = [[swap_wins, swap_count - swap_wins], [attack_wins, attack_count - attack_wins]]
                chi2, p_value, dof, expected = chi2_contingency(contingency_table)
                is_use_p_value = True

                if is_swap_better:
                    if p_value < 0.05:
                        recommended_action = swap_party_zero_action
                    # elif swap_win_rate_better_rate >= .1:
                    #     recommended_action = swap_party_zero_action
                    elif swap_win_rate_better_rate >= .05 and p_value < .1:
                        recommended_action = swap_party_zero_action
                    # if p_value < 0.25:
                    #     recommended_action = swap_party_zero_action
                    # elif swap_win_rate_better_rate >= .1:
                    #     recommended_action = swap_party_zero_action
                    # elif swap_win_rate_better_rate >= .05 and p_value < .6:
                    #     recommended_action = swap_party_zero_action

                if is_print_statistics:
                    #print(f'Swap Win : { win_loss_draw1[0] / sum(win_loss_draw1):.3f}')
                    print(f"Swap win rate: {swap_wins / swap_count:.3f} | Count {swap_count}")
                    print(f"Attack win rate: {attack_wins / attack_count:.3f} | Count {attack_count}")
                    print(f'Chi-square statistic: {chi2:.3f}')
                    print(f'P-value: {p_value:.5f}')

        else:
            is_use_p_value = False
            is_swap_better = False
            p_value = None
            swap_win_rate_better_rate = 0.
            recommended_action = attack_action

    except Exception as e:
        print("Error: in chi square test ", str(e) )
        is_use_p_value = False
        is_swap_better = False
        p_value = None
        swap_win_rate_better_rate = 0.
        recommended_action = attack_action
    
    return recommended_action, swap_win_rate_better_rate, is_use_p_value, is_swap_better, p_value



### create action dict

In [19]:
results_dict = load_pkl_object('G:\\3v3_vgc_saves_71124\\3v3_results\\v2_mc_test_10_1720982160_action_state_results_dict.pickle')

print(len(results_dict))
for k, v in results_dict.items():
    print(k, v)
    break

676
(2, 2, 2, -1, -1, -1, 0, 3, 0, 1, 0, 0) {'attack': {'count': 15, 'sum_wins': 5}, 'swap_0': {'count': 14, 'sum_wins': 2}, 'swap_1': {'count': 14, 'sum_wins': 5}}


In [20]:

def get_create_swap_dict(results_dict, save_tag, min_swap_count=100):
    count_key = 'count'
    sum_wins_key = 'sum_wins'
    agent_first_move_attack_key = 'attack'
    agent_first_move_swap_party_0_key = 'swap_0'
    agent_first_move_swap_party_1_key = 'swap_1'

    time_int = int(time.time())

    swap_dict = {}
    swap_better_count = 0

    try:
        for state_key, moves_dict in results_dict.items():

            if ( agent_first_move_swap_party_0_key in moves_dict or agent_first_move_swap_party_1_key in moves_dict ) \
                and agent_first_move_attack_key in moves_dict:

                # get wins, counts for move
                attack_count = moves_dict.get(agent_first_move_attack_key, {}).get(count_key, 0)
                attack_wins = moves_dict.get(agent_first_move_attack_key, {}).get(sum_wins_key, 0)

                swap_0_count = moves_dict.get(agent_first_move_swap_party_0_key, {}).get(count_key, 0)
                swap_0_wins = moves_dict.get(agent_first_move_swap_party_0_key, {}).get(sum_wins_key, 0)

                swap_1_count = moves_dict.get(agent_first_move_swap_party_1_key, {}).get(count_key, 0)
                swap_1_wins = moves_dict.get(agent_first_move_swap_party_1_key, {}).get(sum_wins_key, 0)
                

                if swap_0_count < min_swap_count and swap_1_count >= min_swap_count:
                    best_swap_count = swap_1_count
                    best_swap_wins = swap_1_wins
                    potential_recommended_swap_action = 2
                elif swap_1_count < min_swap_count and swap_0_count >= min_swap_count:
                    best_swap_count = swap_0_count
                    best_swap_wins = swap_0_wins
                    potential_recommended_swap_action = 1
                elif swap_1_count >= min_swap_count and swap_0_count >= min_swap_count:
                    swap_0_win_rate = swap_0_wins / (swap_0_count)
                    swap_1_win_rate = swap_1_wins / (swap_1_count)

                    if swap_0_win_rate >= swap_1_win_rate:
                        best_swap_count = swap_0_count
                        best_swap_wins = swap_0_wins
                        potential_recommended_swap_action = 1
                    else:
                        best_swap_count = swap_1_count
                        best_swap_wins = swap_1_wins
                        potential_recommended_swap_action = 2
                else:
                    best_swap_count = 0
                    best_swap_wins = 0
                    potential_recommended_swap_action = 0

                recommended_action, swap_win_rate_better_rate, is_use_p_value, is_swap_better, p_value = get_chi_square_test_from_count_wins(
                    best_swap_wins, best_swap_count, attack_wins, attack_count,
                    min_total_count=min_swap_count*2,
                    min_swap_count=min_swap_count,
                    min_attack_count=min_swap_count,
                    is_print_statistics=False)

            
                if is_swap_better:

                    swap_better_count += 1

                    if recommended_action != 0:
                        # above looked to see if the best swap is better than attack
                        # here assign the actual swap action to the lookup dict
                        actual_recommended_action = potential_recommended_swap_action
                    else:
                        actual_recommended_action = 0

                    if actual_recommended_action != 0:
                        swap_dict[state_key] = {
                            'recommended_action': actual_recommended_action,
                            'swap_win_rate_better_rate': swap_win_rate_better_rate,
                            'is_use_p_value': is_use_p_value,
                            'is_swap_better': is_swap_better,
                            'p_value': p_value,
                            'best_swap_count': best_swap_count,
                            'attack_count': attack_count,
                            'best_swap_wins': best_swap_wins,
                            'attack_wins': attack_wins,
                        }


    except Exception as e:
        print("Error in iterating through dict: ", str(e), state_key)

    save_swap_function_tag = f'swap_{save_tag}_dict_{time_int}'
    save_object_as_pkl(swap_dict, save_swap_function_tag)

    print(f"swap better count: {swap_better_count} | len swap dict {len(swap_dict)}")

    return swap_dict, save_swap_function_tag


In [31]:
swap_dict, after_function_swap_save_tag = get_create_swap_dict(results_dict, swap_dict_save_tag, min_swap_count=100)
# print("SET VALUE BACK AFTER DONE TESTING")
print(len(swap_dict), len(results_dict))

saving:  swap_dict_results\swap_swap_recommended_dict_1720983431_dict_1720984374.pickle
swap better count: 72 | len swap dict 28
SET VALUE BACK AFTER DONE TESTING
28 676


In [33]:
for k, v in swap_dict.items():
    print(k, v)
    break

(3, 2, 2, -1, -1, -1, 0, 3, 0, 1, 0, 0) {'recommended_action': 2, 'swap_win_rate_better_rate': 0.5, 'is_use_p_value': True, 'is_swap_better': True, 'p_value': 0.0, 'best_swap_count': 2, 'attack_count': 2, 'best_swap_wins': 2, 'attack_wins': 1}


### Evaluate

In [34]:
import numpy as np
import time
import copy

from vgc.datatypes.Objects import PkmTeam, Pkm, GameState, Weather
from vgc.engine.PkmBattleEnv import PkmBattleEnv
from vgc.util.generator.PkmTeamGenerators import RandomTeamGenerator

from vgc.datatypes.Constants import TYPE_CHART_MULTIPLIER, MAX_HIT_POINTS, MOVE_MAX_PP, DEFAULT_TEAM_SIZE
from vgc.datatypes.Objects import PkmMove, Pkm, PkmTeam, GameState, Weather
from vgc.datatypes.Types import PkmStat, PkmType, WeatherCondition, \
    N_TYPES, N_STATUS, N_STATS, N_ENTRY_HAZARD, N_WEATHER, PkmStatus, PkmEntryHazard


import pprint
import pickle

from pkm_battle_env_wrapper import NiBot, SimpleBot
from vgc.behaviour.BattlePolicies import RandomPlayer
import importlib

from scipy.stats import chi2_contingency

In [35]:
def chi_square_test(winner_dict, num_battles):
    contingency_table = [[winner_dict[0], winner_dict[1]], [winner_dict[1], winner_dict[0]]]
    chi2, p_value, dof, expected = chi2_contingency(contingency_table)
    print(f'Chi-square statistic: {chi2:.3f}')
    print(f'P-value: {p_value:.5f}')
    print(f'team_1 win rate: {winner_dict[0] / num_battles:.3f}')
    print(f'team_2 win rate: {winner_dict[1] / num_battles:.3f}')

In [36]:
import pequil_bot_battle_policy_v2

importlib.reload(pequil_bot_battle_policy_v2)
from pequil_bot_battle_policy_v2 import PequilBotV2

pequil_bot_v2 = PequilBotV2()

In [37]:
num_battles = 1000

RANDOM_BOT_NAME = 'random'
NIBOT_NAME = 'nibot'
SIMPLE_BOT_NAME = 'simple'
PEQUIL_BOT_NAME = 'pequil'

is_pequil_bot_team_1 = True
opponent_bot = SIMPLE_BOT_NAME #RANDOM_BOT_NAME #SIMPLE_BOT_NAME #NIBOT_NAME#RANDOM_BOT_NAME

winner_dict = {
    0:0,
    1:0,
    -1:0
}

pkm_env_action_dict = {
    0:{
        0:0,
        1:0,
        2:0,
        3:0,
        4:0,
        5:0,
    },
    1:{
        0:0,
        1:0,
        2:0,
        3:0,
        4:0,
        5:0,
    },
}

action_state_results_dict = {}

max_episode_steps = 250
agent_index = 0

team_generator = RandomTeamGenerator(2)

time_int = int(time.time())
# save_tag =  f"_smoke_test_{time_int}"
start_time = time.time()

pequil_bot_v2 = PequilBotV2()
random_agent = RandomPlayer()
nibot = NiBot()
simple_bot = SimpleBot()

for battle_idx in range(num_battles):
    
    team_1 = team_generator.get_team().get_battle_team([0, 1, 2])
    team_2 = team_generator.get_team().get_battle_team([0, 1, 2])

    

    # set new environment with teams
    env = PkmBattleEnv((team_1, team_2),
                   encode=(False, False)) 

    game_state, info = env.reset()

    is_first_move = True
    agent_first_move = None
    state_key = None

    for episode_step in range(max_episode_steps):
        
        if is_pequil_bot_team_1:
            team_1_action = pequil_bot_v2.get_action(game_state[0])

            if opponent_bot == NIBOT_NAME:
                team_2_action = nibot.get_action(game_state[1])
            elif opponent_bot == SIMPLE_BOT_NAME:
                team_2_action = simple_bot.get_action(game_state[1])
            else:
                team_2_action = random_agent.get_action(game_state[1])
        else:
            if opponent_bot == NIBOT_NAME:
                team_1_action = nibot.get_action(game_state[0])
            elif opponent_bot == SIMPLE_BOT_NAME:
                team_1_action = simple_bot.get_action(game_state[0])
            else:
                team_1_action = random_agent.get_action(game_state[0])

            team_2_action = pequil_bot_v2.get_action(game_state[1])

        # enter action and step the env
        action_list = [team_1_action, team_2_action]
        pkm_env_action_dict[0][team_1_action] += 1
        pkm_env_action_dict[1][team_2_action] += 1

        game_state, _not_used_reward, terminated, truncated, info = env.step(action_list)  # for inference, we don't need reward

        if episode_step == max_episode_steps - 1:
            print('Warning: max steps reached')
            terminated = True

        if terminated:
            winner = env.winner
            winner_dict[winner] += 1
            break


pprint.pprint(winner_dict)
pprint.pprint(pkm_env_action_dict)
# print(action_dict)


Recommended action taken  (2, 3, 2, -1, -1, -1, 0, 3, 0, 2, 0, 0)
{'recommended_action': 2, 'swap_win_rate_better_rate': 0.5, 'is_use_p_value': True, 'is_swap_better': True, 'p_value': 0.0, 'best_swap_count': 2, 'attack_count': 3, 'best_swap_wins': 1, 'attack_wins': 0}
Recommended action taken  (3, 1, 2, -1, -1, -1, 0, 3, 0, 0, 0, 1)
{'recommended_action': 1, 'swap_win_rate_better_rate': 0.4, 'is_use_p_value': True, 'is_swap_better': True, 'p_value': 0.0, 'best_swap_count': 5, 'attack_count': 5, 'best_swap_wins': 2, 'attack_wins': 0}
Recommended action taken  (2, 2, 1, -1, -1, -1, 0, 3, 1, 1, 0, 0)
{'recommended_action': 2, 'swap_win_rate_better_rate': 0.36601307189542487, 'is_use_p_value': True, 'is_swap_better': True, 'p_value': 0.0623304176614604, 'best_swap_count': 17, 'attack_count': 18, 'best_swap_wins': 10, 'attack_wins': 4}
Recommended action taken  (2, 3, 2, -1, -1, -1, 0, 3, 0, 2, 0, 0)
{'recommended_action': 2, 'swap_win_rate_better_rate': 0.5, 'is_use_p_value': True, 'is_sw

In [38]:


chi_square_test(winner_dict, num_battles)


Chi-square statistic: 0.578
P-value: 0.44710
team_1 win rate: 0.509
team_2 win rate: 0.491
