In [None]:
import sys
sys.path.append('../')
import datetime

import torch as T

from board import Connect4Board
from board2dqn import createStateTensor
from agent import Connect4Agent, calculateReward
from validation import validate
from dqn import exportOnnx

def log(message):
    print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] {message}")

In [None]:
#
# Hyper parameters
# 
lr = 0.1
gamma = 0.9
epsilon = 0.5
eps_min = 0.2
eps_dec = 1e-6
batch_count = 4
batch_size = 256
memory_size = 128000

target_update_interval = 16

In [None]:
agent = Connect4Agent(
    lr = lr, 
    epsilon = epsilon, 
    epsilon_end = eps_min, 
    epsilon_decay = eps_dec,
    batch_size = batch_size, 
    batch_count = batch_count,
    memory_size = memory_size,
    gamma = gamma,
    targetUpdateInterval=target_update_interval
)

In [None]:
# load agent from checkpoint
agent.loadCheckpoint(f'connect4')

In [None]:
#
# TRAINING
#
gamesToGo = 500000

log_interval = 5000

validation_interval = 20000
validation_gamesPerPlayer = 1000
validation_procsPerPlayer = 8
validation_strength = 100

lastLoggedGame = 0
games = set()
allGames = set()

log(f"Starting training for {gamesToGo} games.")

for game in range(1, gamesToGo+1):
    env = Connect4Board()
        
    next_state = createStateTensor(env)
    
    while not env.Finished:
        state = next_state
        validMoves = [a for a in range(7) if env.is_valid(a)]
        action = agent.getTrainingAction(state, validMoves)
        env.move(action)
        next_state = createStateTensor(env)
        validMovesMask = T.zeros(7, dtype=bool)
        validMovesMask[validMoves] = True
        reward = calculateReward(env)
        agent.store_transition(state, action, next_state, validMovesMask, env.Finished, reward)

    games.add(env.gameKey)
    allGames.add(env.gameKey)
    
    agent.learn()

    if game % log_interval == 0:
        log(f'{game} games, div: {100*len(games)/(game+1-lastLoggedGame):.2f} / {100*len(allGames)/(game+1):.2f}')
        games.clear()
        lastLoggedGame = game
        agent.printStats()
    if game % validation_interval == 0:
        agent.saveCheckpoint(f'connect4-{game}')
        log(f'Validation:')
        validate(agent.evaluationModel, validation_gamesPerPlayer, validation_procsPerPlayer, validation_strength)

In [None]:
validate(agent.evaluationModel, 1000, 8, 100)

In [None]:
exportOnnx(agent.evaluationModel, 'connect4')