In [1]:
import sys
sys.path.append('../')
import datetime
import numpy as np
from board import Connect4Board
from agent import Connect4Agent, createStateTensor, calculateReward
from validation import validate
from dqn import exportOnnx

def log(message):
    print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] {message}")

In [2]:
#
# Hyper parameters
# 
lr = 0.09
gamma = 0.9
epsilon = 0.1
eps_min = 0.1
eps_dec = 1e-7
batch_size = 256
memory_size = 128000

target_update_interval = 32

In [3]:
agent = Connect4Agent(
    lr = lr, 
    epsilon = epsilon, 
    epsilon_end = eps_min, 
    epsilon_decay = eps_dec,
    batch_size = batch_size, 
    memory_size = memory_size,
    gamma = gamma,
    targetUpdateInterval=target_update_interval
)

Agent created, network has 255479 parameters and runs on cpu.


In [4]:
# load agent from checkpoint
agent.loadCheckpoint(f'connect4')

Loaded checkpoint connect4.


In [None]:
#
# TRAINING
#
gamesToGo = 500000

log_interval = 5000

validation_interval = 5000
validation_games = 1000
omega = 1

lastLoggedGame = 0
games = set()
allGames = set()

log(f"Starting training for {gamesToGo} games.")

for game in range(gamesToGo):
    env = Connect4Board()
        
    next_state = createStateTensor(env)
    
    while not env.Finished:
        state = next_state
        action = agent.getTrainingAction(state, env.ValidMovesMask)
        env.move(action)
        next_state = createStateTensor(env)
        reward = calculateReward(env)
        agent.store_transition(state, action, next_state, env.ValidMovesMask, env.Finished, reward)

    games.add(env.gameKey)
    allGames.add(env.gameKey)
    agent.learn()

    if game % log_interval == 0:
        log(f'{game} games, div: {100*len(games)/(game+1-lastLoggedGame):.2f} / {100*len(allGames)/(game+1):.2f}')
        games.clear()
        lastLoggedGame = game
        agent.printStats()
    if game % validation_interval == 0:
        agent.saveCheckpoint(f'connect4-{game}')
        log(f'Validation:')
        validate(agent, validation_games, omega)

In [5]:
exportOnnx(agent.evaluationModel, "test")