In [None]:
import showdown
import agent
import asyncio
import torch
import json
from datetime import datetime
from IPython.display import clear_output
from matplotlib import pyplot as plt

In [None]:

async def agent_battle(agent, showdown):
    await showdown.restart()
    done = False
    totalReward = 0
    battleProgress = []
    
    while not done:
        state = showdown.getState()
        validActions = showdown.getValidActions()
        
        action = agent.act(state, validActions)
        nextState, reward, done, winner = await showdown.executeAction(action)
        battleProgress.append((state, action, reward, nextState, done))
    
    battleReward = -5 if not winner else 5
    for i, (state, action, reward, nextState, done) in enumerate(battleProgress):
        adjustedReward = reward + (battleReward / (len(battleProgress)))
        totalReward += adjustedReward
        agent.remember(state, action, adjustedReward, nextState, done)
    
    print("Finishing battle...")
    return winner, totalReward

In [None]:
def makePlot(x, y, battle, timestamp):
        plt.plot(x, y)
        plt.xlabel('Battles')
        plt.ylabel('Rewards')
        plt.title('Learning Curve')
        plt.savefig(f"data/logs/plots/plot-{battle}-{timestamp}.png")
        plt.show()

In [None]:
async def training_loop(agent1, agent2, showdown1, showdown2, numBattles=5000):
    agent1Wins = 0
    rewards1 = 0
    plotX = []
    plotY = []
    
    agent2.model.load_state_dict(agent1.model.state_dict())
    
    for battle in range(numBattles):
        
        # Concurrently execute both agents and get the results from agent_battle
        results = await asyncio.gather(agent_battle(agent1, showdown1), agent_battle(agent2, showdown2))
        winner = results[0][0]
        
        if winner:
            agent1Wins += 1
            rewards1 += results[0][1]
            plotY.append(results[0][1])
        else:
            rewards1 += results[0][1]
            plotY.append(results[0][1])
            
        agent1.replay()
        
        plotX.append(battle)
        
        agent1.replay()        
        
        # Every 10 battles, output the current state and clear the old output.
        # Notebooks are so laggy.
        if battle % 10 == 0 and battle > 0:
            clear_output(wait=True)
            
            timestamp = datetime.now().strftime("%Y_%m%d-%p%I_%M_%S")
            # Save output to file
            with open(f"data/logs/outputs/output-{battle}-{timestamp}.txt", "w") as file:
                file.write(f"Current Stats: \n Wins This Cycle: {agent1Wins} \n Battles: {battle} \n Epsilon: {agent.epsilon}")
             
            print(f"Cleared Output! Current Stats: \n Wins This Cycle: {agentWins} \n  Battles: {battle} \n Epsilon: {agent.epsilon}")
        
        # Every 50 battles, save the model and memory.
        if battle % 50 == 0 and battle > 0:
            
            # Reset epsilon according to win ratio
            winRatio = agent1Wins / battle
            
            if winRatio in range(0.40, 0.60):
                
                # Reset Epsilon
                agent1.epsilon = max(agent.epsilon, 0.3)
            
            # Save model and memory
            agent1.saveModel(f"data/models/model_{battle}.pt")
            agent1.saveMemory(f"data/memory/memory_{battle}.json")
            
            # Save plot
            makePlot(plotX, plotY, battle, timestamp)
            
            # Set agent 2's weights to agent 1's.
            agent2.model.load_state_dict(agent1.model.state_dict())

            f = open(f"data/stats/{battle}.json", "w")
            f.write(json.dumps({"wins": agent1Wins, "rewards": rewards1}))
            f.close()

In [None]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
stateSize = 671

possibleActions = json.load(open("data/possible_actions.json", "r"))
actionSize = len(possibleActions)

agent1 = agent.Agent(stateSize, actionSize, device, possibleActions)
agent2 = agent.Agent(stateSize, actionSize, device, possibleActions)
#newAgent.loadModel("data/models/model_450.pt")
#newAgent.loadMemory("data/memory/memory_450.json")

sd1 = showdown.Showdown("https://play.pokemonshowdown.com/action.php", "PoryAI-1", "password", "ws://localhost:8000/showdown/websocket", "gen9randombattle", True)
sd2 = showdown.Showdown("https://play.pokemonshowdown.com/action.php", "PoryAI-2", "password", "ws://localhost:8000/showdown/websocket", "gen9randombattle", False)
await sd1.connectNoSecurity()
await sd2.connectNoSecurity()
await training_loop(agent1, agent2, sd1, sd2, 10000)