In [1]:
%load_ext autoreload
%autoreload 2

import torch
torch.set_default_device('cuda') 

import os.path
from pathlib import Path
import uuid

sess_id = str(uuid.uuid4())[:8]
sess_path = Path(f'session_{sess_id}')
sess_path.mkdir(exist_ok=True)

states_path = Path('states')
states_path.mkdir(exist_ok=True)

In [35]:
from emulator import Emulator, SIMPLE_ACTION_SPACE, MOVEMENT_ACTION_SPACE
from pokered_vecenv import PokeRedVecEnv

from explore_low_agent import ExploreLowAgent
from basic_flee_agent import BasicFleeAgent

from policy import Policy

gb_path = './PokemonRed.gb'
init_state = './has_pokedex_nballs.state'
#init_state = 'states/11_5_0-0.state'

emulators = [Emulator(sess_path, gb_path, instance_id=f'main_{i}', headless=True) for i in range(10)]
vec_env = PokeRedVecEnv(emulators, [init_state])

flee_policy = Policy([], SIMPLE_ACTION_SPACE)
flee_agent = BasicFleeAgent('flee_agent', SIMPLE_ACTION_SPACE)

explore_policy = Policy([], MOVEMENT_ACTION_SPACE)
explore_agent = ExploreLowAgent('explore_agent', MOVEMENT_ACTION_SPACE, 12)

vec_env.register_agent(flee_agent)
vec_env.register_agent(explore_agent)
vec_env.initial_agent('explore_agent')

policies = {'explore_agent': explore_policy, 'flee_agent': flee_policy}

In [3]:
from ppo import PPOSettings, PPOTrainer
pposettings = PPOSettings()
ppo = PPOTrainer(policies['explore_agent'], pposettings)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from training import train

#temp variables
num_envs = 10
num_steps = 400
num_iterations = 100

train(policies, vec_env, num_envs, 'explore_agent', ppo, num_steps=num_steps, num_iterations=num_iterations, verbose=True)

Rollout - explore_agent - Reward = 0.3100000000000001, Success = False
Rollout - explore_agent - Reward = 0.22000000000000006, Success = False
Rollout - explore_agent - Reward = 0.24800000000000008, Success = False
Rollout - explore_agent - Reward = 0.26000000000000006, Success = False
Rollout - explore_agent - Reward = 0.2800000000000001, Success = False
Rollout - explore_agent - Reward = 0.4380000000000002, Success = False
Rollout - explore_agent - Reward = 0.20000000000000004, Success = False
Rollout - explore_agent - Reward = 0.3100000000000001, Success = False
Rollout - explore_agent - Reward = 0.5880000000000003, Success = False
Rollout - explore_agent - Reward = 0.2780000000000001, Success = False
Time spent stepping: 13.895978689193726
Success Rate: 0.0%
Repeating without training!
value_loss: 0.011207297444343567
policy_loss: 0.008226786740124226
entropy: 1.2652487754821777
old_approx_kl: 0.03174450248479843
approx_kl: 0.02304500713944435
Rollout - explore_agent - Reward = 0.2

KeyboardInterrupt: 

In [6]:
import json

tempd = {'what': 'to'}
temp = {'hi': 5, 'yo': 4, 'tempd': tempd}
json.dumps(temp)

'{"hi": 5, "yo": 4, "tempd": {"what": "to"}}'

In [36]:
emu = Emulator(sess_path, gb_path, instance_id='main', headless=True)
emu.reset(init_state)

In [66]:
from ram_parsing import read_string, bytes_to_string


POKE1_TRAINER_NAME_ADDR = range(0xD273, 0xD27E)

read_string(emu.pyboy, POKE1_TRAINER_NAME_ADDR)

'RED'

In [58]:
bytes_to_string(bys)

'RED'

In [44]:
text_table.text_table

{79: ' ',
 87: '#',
 81: '*',
 82: 'A1',
 83: 'A2',
 84: 'POKé',
 85: '+',
 88: '$',
 117: '…',
 127: ' ',
 128: 'A',
 129: 'B',
 130: 'C',
 131: 'D',
 132: 'E',
 133: 'F',
 134: 'G',
 135: 'H',
 136: 'I',
 137: 'J',
 138: 'K',
 139: 'L',
 140: 'M',
 141: 'N',
 142: 'O',
 143: 'P',
 144: 'Q',
 145: 'R',
 146: 'S',
 147: 'T',
 148: 'U',
 149: 'V',
 150: 'W',
 151: 'X',
 152: 'Y',
 153: 'Z',
 154: '(',
 155: ')',
 156: ':',
 157: ';',
 158: '[',
 159: ']',
 160: 'a',
 161: 'b',
 162: 'c',
 163: 'd',
 164: 'e',
 165: 'f',
 166: 'g',
 167: 'h',
 168: 'i',
 169: 'j',
 170: 'k',
 171: 'l',
 172: 'm',
 173: 'n',
 174: 'o',
 175: 'p',
 176: 'q',
 177: 'r',
 178: 's',
 179: 't',
 180: 'u',
 181: 'v',
 182: 'w',
 183: 'x',
 184: 'y',
 185: 'z',
 186: 'é',
 187: "'d",
 188: "'l",
 189: "'s",
 190: "'t",
 191: "'v",
 224: "'",
 225: 'PK',
 226: 'MN',
 227: '-',
 228: "'r",
 229: "'m",
 230: '?',
 231: '!',
 232: '.',
 237: '→',
 238: '↓',
 239: '♂',
 240: '¥',
 241: '×',
 243: '/',
 244: ',',
 245

In [52]:
bys

[145, 132, 131, 80, 128, 146, 135, 80, 137, 128, 130]

In [54]:
hex(bys[0])

'0x91'

In [56]:
emu = Emulator(sess_path, gb_path, instance_id='main', headless=False)
emu.reset(init_state)
while True:
    emu.run(7)

7664861  pyboy.pyboy                    ERROR    State file not found: ./PokemonRed.gb.state
7665041  pyboy.pyboy                    ERROR    State file not found: ./PokemonRed.gb.state


KeyboardInterrupt: 

In [63]:
from ram_parsing import PARTY_ADDR, read_uint
party = [emu.pyboy.get_memory_value(addr) for addr in PARTY_ADDR]

In [64]:
party

[177, 255, 0, 0, 0, 0]

In [65]:
emu.pyboy.get_memory_value(0xD16B)

177

In [77]:
from ram_parsing import read_uint
from ram_map import PARTY_POKEMON_ADDR_LIST, POKEMON_CURRENT_HP_OFFSET_RANGE

read_uint(emu.pyboy, POKEMON_CURRENT_HP_OFFSET_RANGE, PARTY_POKEMON_ADDR_LIST[0])

22

In [4]:
from ram_parsing import parse_pokemon
import ram_map
from ram_map import PARTY_POKEMON_ADDR_LIST
import importlib
importlib.reload(ram_map)

poke1 = parse_pokemon(emu.pyboy, PARTY_POKEMON_ADDR_LIST[0])
poke1

{'Pokemon ID': 22,
 'Status': 0,
 'Type 1': 21,
 'Type 2': 21,
 'Catch Rate': 45,
 'Move 1': 33,
 'Move 2': 39,
 'Move 3': 0,
 'Move 4': 0,
 'Trainer ID': 25110,
 'Experience': 202,
 'HP EV': 45,
 'Attack EV': 49,
 'Defense EV': 49,
 'Speed EV': 45,
 'Special EV': 65,
 'Attack/Defense IV': 135,
 'Speed/Special IV': 146,
 'PP Move 1': 35,
 'PP Move 2': 30,
 'PP Move 3': 0,
 'PP Move 4': 0,
 'Level': 6,
 'Max HP': 22,
 'Attack': 11,
 'Defense': 13,
 'Speed': 11,
 'Special': 11}

In [86]:
POKEMON_EXP_OFFSET_RANGE = range(0xE, 0x11)
from ram_parsing import read_uint

read_uint(emu.pyboy, POKEMON_EXP_OFFSET_RANGE, PARTY_POKEMON_ADDR_LIST[0])


202

In [27]:
from ram_parsing import parse_party
from ram_map import PLAYER_PARTY_ADDR

party = parse_party(emu.pyboy, 0xDA80, 20)
party

{'Count': 0}

In [8]:
party['Pokemon 1']

{'Pokemon ID': 177,
 'Current HP': 22,
 'Status': 0,
 'Type 1': 21,
 'Type 2': 21,
 'Catch Rate': 45,
 'Move 1': 33,
 'Move 2': 39,
 'Move 3': 0,
 'Move 4': 0,
 'Trainer ID': 25110,
 'Experience': 202,
 'HP EV': 45,
 'Attack EV': 49,
 'Defense EV': 49,
 'Speed EV': 45,
 'Special EV': 65,
 'Attack/Defense IV': 135,
 'Speed/Special IV': 146,
 'PP Move 1': 35,
 'PP Move 2': 30,
 'PP Move 3': 0,
 'PP Move 4': 0,
 'Level': 6,
 'Max HP': 22,
 'Attack': 11,
 'Defense': 13,
 'Speed': 11,
 'Special': 11,
 'Pokemon Name': 'AAAAAAAAAA',
 'Trainer Name': 'RED'}

In [21]:
from ram_parsing import parse_storage

inv = parse_storage(emu.pyboy)
inv

{'Total Items': 1, 'Items': [('Potion', 1)]}

In [13]:
emu.pyboy.get_memory_value(0xD322)

255

In [20]:
from ram_parsing import parse_money
#975
parse_money(emu.pyboy)

975

In [18]:
emu.pyboy.get_memory_value(0xD349)

117

In [17]:
9 * 256

2304

In [31]:
from map_table import map_table

map_table

{0: 'Pallet Town',
 1: 'Viridian City',
 2: 'Pewter City',
 3: 'Cerulean City',
 4: 'Lavender Town',
 5: 'Vermilion City',
 6: 'Celadon City',
 7: 'Fuchsia City',
 8: 'Cinnabar Island',
 9: 'Pokémon League',
 10: 'Saffron City',
 11: 'Unused Fly location',
 12: 'Route 1',
 13: 'Route 2',
 14: 'Route 3',
 15: 'Route 4',
 16: 'Route 5',
 17: 'Route 6',
 18: 'Route 7',
 19: 'Route 8',
 20: 'Route 9',
 21: 'Route 10',
 22: 'Route 11',
 23: 'Route 12',
 24: 'Route 13',
 25: 'Route 14',
 26: 'Route 15',
 27: 'Route 16',
 28: 'Route 17',
 29: 'Route 18',
 30: 'Sea Route 19',
 31: 'Sea Route 20',
 32: 'Sea Route 21',
 33: 'Route 22',
 34: 'Route 23',
 35: 'Route 24',
 36: 'Route 25',
 37: "Red's house (first floor)",
 38: "Red's house (second floor)",
 39: "Blue's house",
 40: "Professor Oak's Lab",
 41: 'Pokémon Center (Viridian City)',
 42: 'Poké Mart (Viridian City)',
 43: 'School (Viridian City)',
 44: 'House 1 (Viridian City)',
 45: 'Gym (Viridian City)',
 46: "Diglett's Cave (Route 2 ent

In [37]:
from save_state import save_state

save_state(emu)

In [33]:
emu.run(7)