In [None]:
import numpy as np
from progress import progress
from judger import Judger
from players import AIPlayer, RandomPlayer, HumanPlayer, DLPlayer

import matplotlib.pyplot as plt

In [None]:
def train_compete ( P1, P2, epochs = 500 ):
    
    P1W_AIRd = []
    P2W_AIRd = []
    Drw_AIRd = []
    
    P1W_RdAI = []
    P2W_RdAI = []
    Drw_RdAI = []

    judger = Judger( P1, P2, learning = True )
    
    P1Win = 0.0
    P2Win = 0.0
    Draws = 0.0
    
    for i in range(epochs):
        
        winner = judger.play( show = False )
        
        if winner == 1:
            P1Win += 1
        if winner == -1:
            P2Win += 1
        if winner == 0:
            Draws += 1
            
        judger.reset()
        
        progress( count = i+1, total = epochs, 
                 status1 = 'Game %s/%s' % ( str(i+1).zfill(int(np.ceil(np.log10(epochs+1)))), epochs), 
                 status2 = 'P1Wins: %.2f, P2Wins: %.2f, Draws: %.2f' % (
                     P1Win/(i+1), P2Win/(i+1), Draws/(i+1) ) )
        
        if i % 100 == 0:  
            P1.savePolicy()
            P2.savePolicy()
            
            P1W_AIRd, P2W_AIRd, Drw_AIRd = compete( DLPlayer(), RandomPlayer(), 
                                                    P1W_AIRd, P2W_AIRd, Drw_AIRd, games = 50 )
            P1W_RdAI, P2W_RdAI, Drw_RdAI = compete( RandomPlayer(), DLPlayer(), 
                                                    P1W_RdAI, P2W_RdAI, Drw_RdAI, games = 50 )
        
    P1.savePolicy()
    P2.savePolicy()
    
    fig, (pltAIRd, pltRdAI) = plt.subplots(2,1)
    
    pltAIRd.plot(P1W_AIRd, label = 'P1=AI' )
    pltAIRd.plot(P2W_AIRd, label = 'P2=Random' )
    pltAIRd.plot(Drw_AIRd, label = 'Draws')

    pltAIRd.legend()
    pltAIRd.set_title( 'AI vs Random' )
    pltAIRd.label_outer()
    
    pltRdAI.plot(P1W_RdAI, label = 'P1=Random' )
    pltRdAI.plot(P2W_RdAI, label = 'P2=AI' )
    pltRdAI.plot(Drw_RdAI, label = 'Draws')

    pltRdAI.legend()
    pltRdAI.set_title( 'Random vs AI' )
    pltRdAI.label_outer()


    plt.show()

In [None]:
def compete ( P1, P2, P1W, P2W, Dr, games = 500 ):
    
    judgerC = Judger( P1, P2, learning = False )
    
    P1.loadPolicy()
    P2.loadPolicy()
    
    P1Win = 0.0
    P2Win = 0.0
    Draws = 0.0
    
    for i in range(games):
        
        winner = judgerC.play( show = False )
        
        if winner == 1:
            P1Win += 1
        if winner == -1:
            P2Win += 1
        if winner == 0:
            Draws += 1
            
        judgerC.reset()
        
    P1W.append( P1Win/games )
    P2W.append( P2Win/games )
    Dr.append( Draws/games )
    
    return P1W, P2W, Dr

In [None]:
# AI vs AI, learning
train_compete( AIPlayer( exp = 0.1 ), AIPlayer( exp = 0.1 ), epochs = 1000 )

In [None]:
# AI vs Random, learning
train( AIPlayer( exp = 0.1 ), RandomPlayer(), epochs = 1000 )

In [None]:
# Random vs AI, learning
train( RandomPlayer(), AIPlayer( exp = 0.1 ), epochs = 10000 )

In [None]:
# DL vs DL, learning-competing
train_compete( DLPlayer( exp = 0.1 ), DLPlayer( exp = 0.1 ), epochs = 5000 )