In [1]:
import showdown
import agent
import asyncio
import torch
import json
from IPython.display import clear_output

In [2]:
async def training_loop(agent, showdown, numBattles=5000):
    agentWins = 0
    agentWinsTotal = 0
    agentLosses = 0
    cumulativeRewards = 0
    for battle in range(numBattles):
        battleReward = -10
        battleProgress = []
        
        
        await showdown.restart()
        battleDone = False
        totalReward = 0
        
        while not battleDone:
            state = showdown.getState()
            
            validActions = showdown.getValidActions()
            action = agent.act(state, validActions)
            nextState, reward, battleDone, winner = await showdown.executeAction(action)
            print(f"Action: {action}, Reward: {reward}")
            
            battleProgress.append((state, action, reward, nextState, battleDone))
            agent.replay()
            totalReward += reward
            cumulativeRewards += reward
            
            if battleDone:
                print(f"Battle {battle} done. Total reward: {totalReward}")
                if winner == 1:
                    agentWins += 1 
                    battleReward = 20
                    
                    # Store winning battles more.
                    for i in range(10):
                        battleProgress.append((nextState, action, battleReward, nextState, battleDone))
                else:
                    agentLosses += 1
        
        # Once the battle's done...
        for i, (state, action, reward, nextState, done) in enumerate(battleProgress):
            # Adjust the reward based on how the battle went.
            adjustedReward = reward + (battleReward / (len(battleProgress)))
            agent.remember(state, action, adjustedReward, nextState, done)
        
        agent.replay()        
        
        if battle % 10 == 0 and battle > 0:
            clear_output(wait=True)
            print(f"Cleared Output! Current Stats: \n Wins This Cycle: {agentWins} \n Total Wins: {agentWinsTotal } \n Losses: {agentLosses} \n Cumulative Rewards: {cumulativeRewards} \n Battles: {battle} \n Epsilon: {agent.epsilon}")
        if battle % 50 == 0 and battle > 0:
            agentWinsTotal += agentWins
            
            if agentWins/50 < 0.2:
                # Reset Epsilon
                agent.epsilon = max(agent.epsilon, 0.3)
            
            agentWins = 0
            
            # Save model and memory
            agent.saveModel(f"data/models/model_{battle}.pt")
            agent.saveMemory(f"data/memory/memory_{battle}.json")
            f = open(f"data/stats/{battle}.json", "w")
            f.write(json.dumps({"wins": agentWins, "losses": agentLosses, "rewards": cumulativeRewards}))
            f.close()

In [None]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
stateSize = 671

possibleActions = json.load(open("data/possible_actions.json", "r"))
actionSize = len(possibleActions)

newAgent = agent.Agent(stateSize, actionSize, device, possibleActions)
#newAgent.loadModel("data/models/model_450.pt")
#newAgent.loadMemory("data/memory/memory_450.json")

showdown = showdown.Showdown("https://play.pokemonshowdown.com/action.php", "PoryAI-0", "password", "ws://localhost:8000/showdown/websocket", "gen9randombattle")

await showdown.connectNoSecurity()
await training_loop(newAgent, showdown, 10000)

PoryAI-0 looking for a battle...
New Battle tag: battle-gen9randombattle-23426
>battle-gen9randombattle-23426
|request|
>battle-gen9randombattle-23426
|j|☆FoulPlay-0
|pm| FoulPlay-0| PoryAI-0|/nonotify FoulPlay-0 accepted the challenge, starting &laquo;<a href="/battle-gen9randombattle-23426">battle-gen9randombattle-23426</a>&raquo;
>battle-gen9randombattle-23426
|t:|1742858993
|gametype|singles
>battle-gen9randombattle-23426
|player|p1|PoryAI-0|101|
>battle-gen9randombattle-23426
|request|{"active":[{"moves":[{"move":"Leaf Storm","id":"leafstorm","pp":8,"maxpp":8,"target":"normal","disabled":false},{"move":"Glare","id":"glare","pp":48,"maxpp":48,"target":"normal","disabled":false},{"move":"Synthesis","id":"synthesis","pp":8,"maxpp":8,"target":"self","disabled":false},{"move":"Dragon Pulse","id":"dragonpulse","pp":16,"maxpp":16,"target":"any","disabled":false}],"canTerastallize":"Water"}],"side":{"name":"PoryAI-0","id":"p1","pokemon":[{"ident":"p1: Serperior","details":"Serperior, L79,