In [1]:
import numpy as np
from progress import progress
from judger import Judger
from players import AIPlayer, HumanPlayer

In [2]:
def train ( epochs = 500 ):
    
    P1 = AIPlayer( exploreRate = 0.1 )
    P2 = AIPlayer( exploreRate = 0.1 )
    judger = Judger( P1, P2, learning = True )
    
    P1Win = 0.0
    P2Win = 0.0
    Draws = 0.0
    
    for i in range(epochs):
        
        winner = judger.play( show = False )
        
        if winner == 1:
            P1Win += 1
        if winner == -1:
            P2Win += 1
        if winner == 0:
            Draws += 1
            
        judger.reset()
        
        progress( count = i+1, total = epochs, 
                 status1 = 'Epoch %s/%s' % ( str(i+1).zfill(int(np.ceil(np.log10(epochs+1)))), epochs), 
                 status2 = 'P1Wins: %.2f, P2Wins: %.2f, Draws: %.2f' % (
                     P1Win/(i+1), P2Win/(i+1), Draws/(i+1) ) )
    
    P1.savePolicy()
    P2.savePolicy()

In [3]:
def compete ( P1, P2, turns = 500 ):
    
    judger = Judger( P1, P2, learning = False )
    
    P1.loadPolicy()
    P2.loadPolicy()
    
    P1Win = 0.0
    P2Win = 0.0
    Draws = 0.0
    
    for i in range( turns ):
        
        winner = judger.play( show = False )
        
        if winner == 1:
            P1Win += 1
        if winner == -1:
            P2Win += 1
        if winner == 0:
            Draws += 1
            
        judger.reset()
        
        progress( count = i+1, total = turns, 
                 status1 = 'Turn %s/%s' % ( str(i+1).zfill(int(np.ceil(np.log10(turns+1)))), turns), 
                 status2 = 'P1Wins: %.2f, P2Wins: %.2f, Draws: %.2f' % (
                     P1Win/(i+1), P2Win/(i+1), Draws/(i+1) ) )

In [4]:
def play():
    
    P1 = AIPlayer( exploreRate = 0 )
    P2 = HumanPlayer()
    judger = Judger( P1, P2, learning = False )

    P1.loadPolicy()

    winner = judger.play( show = True )

    if winner == P2.symbol:
        print( "Win!" )
    elif winner == P1.symbol:
        print( "Lose!" )
    else:
        print( "Tie!" )

In [5]:
def play2():
    
    P1 = HumanPlayer()
    P2 = AIPlayer( exploreRate = 0 )
    judger = Judger( P1, P2, learning = False )
    
    P2.loadPolicy()
    
    winner = judger.play( show = True )
    
    if winner == P1.symbol:
        print("Win!")
    elif winner == P2.symbol:
        print("Lose!")
    else:
        print("Tie!")

In [6]:
train(1000)

Epoch 1000/1000 [##############################] P1Wins: 0.42, P2Wins: 0.12, Draws: 0.46

In [7]:
P1 = AIPlayer( exploreRate = 0 )
P2 = AIPlayer( exploreRate = 0 )
compete( P1, P2, 1000 )

Turn 1000/1000 [##############################] P1Wins: 0.00, P2Wins: 0.00, Draws: 1.00

In [8]:
play()

-------------
|   |   |   | 
-------------
|   |   |   | 
-------------
|   |   |   | 
-------------

-------------
|   |   |   | 
-------------
|   | O |   | 
-------------
|   |   |   | 
-------------



KeyboardInterrupt: 