In [5]:
# imports

import asyncio
import json
import os
import matplotlib
import neptune
import nest_asyncio
import numpy as np
import pandas as pd
import time

from collections import defaultdict
from datetime import date
from itertools import product
from matplotlib import pyplot
from poke_env.environment.abstract_battle import AbstractBattle
from poke_env.player.battle_order import ForfeitBattleOrder
from poke_env.player.player import Player
from scipy.interpolate import griddata
from poke_env.player import Gen8EnvSinglePlayer, RandomPlayer
from poke_env import AccountConfiguration, ShowdownServerConfiguration
from poke_env.player import player as Sampleplayer
# from src.PlayerQLearning import Player as PlayerQLearning


In [6]:
# global configs

debug = True
save_to_json_file = True
use_validation = True
use_neptune = False

nest_asyncio.apply()
np.random.seed(0)

if use_neptune:
    run = neptune.init_run(project='jukainite/pokeREL',
                       api_token='eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2MTM2NzQ3NS0yODQ0LTRmNGItYWRmZi0yNjI1MDRiMDMxYjYifQ==',
                       tags=["Q-learning FA team2 vs team1"])


In [7]:
from poke_env.data import GenData
GEN_9_DATA = GenData.from_gen(9)

In [8]:
# Definition of agent's team (Pokémon Showdown template)

OP_TEAM = """
Iron Jugulis @ Booster Energy  
Ability: Quark Drive  
Tera Type: Steel  
EVs: 4 Atk / 252 SpA / 252 Spe  
Naive Nature  
- Knock Off  
- Hurricane  
- Earth Power  
- Taunt  

Roaring Moon @ Booster Energy  
Ability: Protosynthesis  
Tera Type: Flying  
EVs: 252 Atk / 4 Def / 252 Spe  
Jolly Nature  
- Dragon Dance  
- Acrobatics  
- Knock Off  
- Taunt    

TWINKATON (Tinkaton) @ Air Balloon  
Ability: Pickpocket  
Tera Type: Water  
EVs: 248 HP / 24 SpD / 236 Spe  
Jolly Nature  
- Stealth Rock  
- Play Rough  
- Thunder Wave  
- Encore  

Iron Moth @ Booster Energy  
Ability: Quark Drive  
Tera Type: Fairy  
EVs: 124 Def / 132 SpA / 252 Spe  
Timid Nature  
IVs: 0 Atk  
- Fiery Dance  
- Sludge Wave  
- Psychic  
- Dazzling Gleam  

Great Tusk @ Booster Energy  
Ability: Protosynthesis  
Tera Type: Poison  
EVs: 252 HP / 4 Def / 252 Spe  
Jolly Nature  
- Headlong Rush  
- Ice Spinner  
- Bulk Up  
- Rapid Spin  

Dragapult @ Choice Specs  
Ability: Clear Body  
Tera Type: Ghost  
EVs: 252 SpA / 4 SpD / 252 Spe  
Timid Nature  
- Draco Meteor  
- Shadow Ball  
- Thunderbolt  
- U-turn  

"""


In [9]:
# Definition of opponent's team (Pokémon Showdown template)

OUR_TEAM= """
Daenerys (Kingambit) (F) @ Leftovers  
Ability: Supreme Overlord  
Tera Type: Dark  
EVs: 160 HP / 252 Atk / 96 Spe  
Adamant Nature  
- Kowtow Cleave  
- Iron Head  
- Sucker Punch  
- Swords Dance  

Kristine (Cinderace) (F) @ Heavy-Duty Boots  
Ability: Blaze  
Shiny: Yes  
Tera Type: Flying  
EVs: 144 HP / 112 Atk / 252 Spe  
Jolly Nature  
- Pyro Ball  
- Will-O-Wisp  
- Court Change  
- U-turn  

Homelandor (Landorus-Therian) @ Rocky Helmet  
Ability: Intimidate  
Shiny: Yes  
Tera Type: Dragon  
EVs: 248 HP / 244 Def / 16 Spe  
Bold Nature  
- Earth Power  
- Taunt  
- Stealth Rock  
- U-turn  

WALL-Y (Iron Valiant) @ Booster Energy  
Ability: Quark Drive  
Tera Type: Ghost  
EVs: 176 Atk / 80 SpA / 252 Spe  
Naive Nature  
- Moonblast  
- Close Combat  
- Knock Off  
- Encore  

Mr. Freeze (Kyurem) @ Choice Specs  
Ability: Pressure  
Shiny: Yes  
Tera Type: Ice  
EVs: 4 Def / 252 SpA / 252 Spe  
Timid Nature  
IVs: 0 Atk  
- Draco Meteor  
- Freeze-Dry  
- Earth Power  
- Blizzard  

SlodogChillionaire (Slowking-Galar) (M) @ Heavy-Duty Boots  
Ability: Regenerator  
Shiny: Yes  
Tera Type: Water  
EVs: 248 HP / 8 Def / 252 SpD  
Sassy Nature  
IVs: 0 Atk / 0 Spe  
- Toxic  
- Future Sight  
- Surf  
- Chilly Reception  
"""


In [10]:
N_STATE_COMPONENTS = 12
# num of features = num of state components + action
N_FEATURES = N_STATE_COMPONENTS + 1

N_OUR_MOVE_ACTIONS = 4
N_OUR_SWITCH_ACTIONS = 5
N_OUR_ACTIONS = N_OUR_MOVE_ACTIONS + N_OUR_SWITCH_ACTIONS

ALL_OUR_ACTIONS = np.array(range(0, N_OUR_ACTIONS))

# Encoding Pokémon Name for ID
NAME_TO_ID_DICT_OP= {
    "ironjugulis": 0,
    "roaringmoon": 1,
    "tinkaton": 2,
    "ironmoth": 3,
    "greattusk": 4,
    "dragapult": 5,
    
}
NAME_TO_ID_DICT  = {
    "kingambit": 0,
    "cinderace": 1,
    "landorustherian": 2,
    "ironvaliant": 3,
    "kyurem": 4,
    "slowkinggalar": 5}

NAME_TO_ID ={"kingambit": 0,
    "cinderace": 1,
    "landorustherian": 2,
    "ironvaliant": 3,
    "kyurem": 4,
    "slowkinggalar": 5,
    "ironjugulis": 0,
    "roaringmoon": 1,
    "tinkaton": 2,
    "ironmoth": 3,
    "greattusk": 4,
    "dragapult": 5, }

In [11]:
# Definition of Q-Learning with function approximation validation player

class ValidationPlayer(Player):
    def __init__(self, battle_format, team, w):
        super().__init__(battle_format=battle_format, team=team,account_configuration=AccountConfiguration("QlearningAgent", "duy123456"),)
        # server_configuration=ShowdownServerConfiguration,)
        self.w = w

    def choose_move(self, battle):
        state = self.embed_battle(battle)
        # let's get the greedy action. Ties must be broken arbitrarily
        q_approx = np.array([self.q_approx(state, action, self.w) for action in range(N_OUR_ACTIONS)])
        action = np.random.choice(np.where(q_approx == q_approx.max())[0])

        # if the selected action is not possible, perform a random move instead
        if action == -1:
            return ForfeitBattleOrder()
        elif action < 4 and action < len(battle.available_moves) and not battle.force_switch:
            return self.create_order(battle.available_moves[action])
        elif 0 <= action - 4 < len(battle.available_switches):
            return self.create_order(battle.available_switches[action - 4])
        else:
            return self.choose_random_move(battle)

    def _battle_finished_callback(self, battle):
        pass

    ''' Helper functions '''

    # feature vector
    @staticmethod
    def x(state, action):
        state = np.array(state).astype(float)
        return np.append(state, action)

    # q^(S, A, W)
    def q_approx(self, state, action, w):
        state = np.array(state).astype(float)
        return np.dot(self.x(state, action), w)

    # the embed battle is our state
    # 12 factors: our active mon, opponent's active mon, 4 moves base power, 4 moves multipliers, remaining mons
    @staticmethod
    def embed_battle(battle):
        # -1 indicates that the move does not have a base power
        # or is not available
        moves_base_power = -np.ones(4)
        moves_dmg_multiplier = np.ones(4)
        for i, move in enumerate(battle.available_moves):
            moves_base_power[i] = (
                    move.base_power / 100
            )  # Simple rescaling to facilitate learning
            if move.type:
                moves_dmg_multiplier[i] = move.type.damage_multiplier(
                    battle.opponent_active_pokemon.type_1,
                    battle.opponent_active_pokemon.type_2,
                    type_chart=GEN_9_DATA.type_chart
                )

        # We count how many pokemons have not fainted in each team
        fainted_mon_team = (
            len([mon for mon in battle.team.values() if mon.fainted])
        )
        fainted_mon_opponent = (
            len([mon for mon in battle.opponent_team.values() if mon.fainted])
        )

        state = list()
        # state.append(NAME_TO_ID_DICT[str(battle.active_pokemon).split(' ')[0]])
        # state.append(NAME_TO_ID_DICT[str(battle.opponent_active_pokemon).split(' ')[0]])
        state.append(NAME_TO_ID[str(battle.active_pokemon.species)])
        state.append(NAME_TO_ID[str(battle.opponent_active_pokemon.species)])
        for move_base_power in moves_base_power:
            state.append('{0:.2f}'.format(move_base_power))
        for move_dmg_multiplier in moves_dmg_multiplier:
            state.append('{0:.2f}'.format(move_dmg_multiplier))
        state.append(fainted_mon_team)
        state.append(fainted_mon_opponent)

        return state


In [12]:
# global parameters

# possible values for num_battles (number of episodes)
n_battles_array = [10000]
# exploration schedule from MC, i. e., epsilon(t) = N0 / (N0 + N(S(t)))
n0_array = [0.0001, 0.001, 0.01]
# possible values for alpha0 (initial learning rate)
alpha0_array = [0.01]
# possible values for gamma (discount factor)
gamma_array = [0.75]


list_of_params = [
    {
        'n_battles': n_battles,
        'n0': n0,
        'alpha0': alpha0,
        'gamma': gamma
    } for n_battles, n0, alpha0, gamma in product(n_battles_array, n0_array, alpha0_array, gamma_array)
]


In [13]:
# json helper functions

def save_array_to_json(path_dir, filename, data):
    if not os.path.exists(path_dir):
        os.makedirs(path_dir)
    full_filename = path_dir + "/" + filename
    # write
    with open(full_filename, "w") as file:
        json.dump(data if isinstance(data, list) else data.tolist(), file)
        file.close()


def save_dict_to_json(path_dir, filename, data, append=True):
    if not os.path.exists(path_dir):
        os.makedirs(path_dir)
    full_filename = path_dir + "/" + filename
    if os.path.exists(full_filename) and append:
        with open(full_filename, "r") as file:
            value_dict = json.load(file)
            for key in data:
                value_dict[key] = data[key] if isinstance(data[key], list) else data[key].tolist()
            file.close()
    else:
        value_dict = dict()
        for key in data:
            value_dict[key] = data[key] if isinstance(data[key], list) else data[key].tolist()
    # write
    with open(full_filename, "w") as file:
        json.dump(value_dict, file)
        file.close()


def read_array_from_json(path_dir, filename):
    full_filename = path_dir + "/" + filename
    if not os.path.exists(full_filename):
        return None
    file = open(full_filename, "r")
    data = json.load(file)
    file.close()
    return data
def read_array_from_json_file(filename):
    
  
    file = open(filename, "r")
    data = json.load(file)
    file.close()
    return data
def read_array_from_json_file2(filename):
    
  
    file = open(filename, "r")
    data = json.load(file)
    file.close()
    return data

def read_dict_from_json(path_dir, filename):
    full_filename = path_dir + "/" + filename
    if not os.path.exists(full_filename):
        return None
    file = open(full_filename, "r")
    data = json.load(file)
    file.close()
    return data


In [14]:
w1 = np.array(read_array_from_json_file("W_2024-06-30_10000_0.0001_0.01_0.75_53.24.json"))
validation_player = ValidationPlayer(battle_format="gen9ou", team=OUR_TEAM, w=w1)

In [16]:
await validation_player.send_challenges("DHgamer", n_challenges=1)