In [7]:
import numpy as np
import math
from simplified_connectx import *
%load_ext autoreload
%autoreload 2

##### In MCTS we store 1 tree

##### MCTS Variant, instead of using UCB_score to be infinite and forcing the 
##### Algorithm to explore the whole space, we can "guide" it with NN predictions
##### example p*(math.sqrt(self.total_counts))/(self.SA_counts[(s,action)])
##### Where p is the probability of picking that action according to the NN.

In [8]:
import logging

import coloredlogs

from Coach import Coach
from Game import Game
%autoreload 2

In [13]:
# arguments
args = {
    "load_model": False, # Whether to use a pretrained network 
    "load_folder_file":None, # 
    "checkpoint":"./temp/", # stores checkpoints here
    "num_mcts_sims":10000,
    "c":0.1,
    "num_iters":50, # number of times the NN is updated
    "maxlenOfQueue": 200000,
    "numItersForTrainExamplesHistory":20,
    "numEps":20
}


In [14]:
# Main cell to run the algorithm
g = Game()
c = Coach(g, args)
c.executeEpisode()
print(c)

Currently on move  1
total time taken to run 10000 iterations is 15.004494667053223
The probability distribution is  [0, 1, 0, 0, 0, 0, 0]
array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0]])
result  0
Currently on move  2
total time taken to run 10000 iterations is 14.67066740989685
The probability distribution is  [1, 0, 0, 0, 0, 0, 0]
array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [2, 1, 0, 0, 0, 0, 0]])
result  0
Currently on move  3
total time taken to run 10000 iterations is 14.84201955795288
The probability distribution is  [0, 0, 1, 0, 0, 0, 0]
array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [2, 1, 1, 0, 0, 0, 0]])
result  0
Currently on

In [55]:
from my_mcts import my_mcts
from Game import Game
from Coach import p_print
import time

In [62]:
# Lets make some puzzles to test the algorithm
# Puzzle no 1 - player 2 has to stop a mate in one
temp_args = {
    "load_model": False, # Whether to use a pretrained network 
    "load_folder_file":None, # 
    "checkpoint":"./temp/", # stores checkpoints here
    "num_mcts_sims":1000,
    "c":0.1,
    "num_iters":50, # number of times the NN is updated
    "maxlenOfQueue": 200000,
    "numItersForTrainExamplesHistory":20,
    "numEps":20
}

temp_game = Game()

n_s,r,d,i = temp_game.getNextState(2)
n_s,r,d,i = temp_game.getNextState(1)
n_s,r,d,i = temp_game.getNextState(3)
n_s,r,d,i = temp_game.getNextState(2)
n_s,r,d,i = temp_game.getNextState(4)

p_print(temp_game.env.board)

temp_mcts = my_mcts(temp_game,temp_args)
temp_mcts.getActionProb(temp_game,2)


array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 2, 0, 0, 0, 0],
       [0, 2, 1, 1, 1, 0, 0]])
total time taken to run 1000 iterations is 0.9335808753967285
The probability distribution is  [0, 1, 0, 0, 0, 0, 0]


[0, 1, 0, 0, 0, 0, 0]

In [63]:
# Lets make some puzzles to test the algorithm
# Puzzle no 2 - player 1 has to stop a mate in one
temp_args = {
    "load_model": False, # Whether to use a pretrained network 
    "load_folder_file":None, # 
    "checkpoint":"./temp/", # stores checkpoints here
    "num_mcts_sims":1000,
    "c":0.1,
    "num_iters":50, # number of times the NN is updated
    "maxlenOfQueue": 200000,
    "numItersForTrainExamplesHistory":20,
    "numEps":20
}

temp_game = Game()

start = time.time()
n_s,r,d,i = temp_game.getNextState(1)
n_s,r,d,i = temp_game.getNextState(2)
n_s,r,d,i = temp_game.getNextState(1)
n_s,r,d,i = temp_game.getNextState(3)
n_s,r,d,i = temp_game.getNextState(0)
n_s,r,d,i = temp_game.getNextState(4)
print(f"total_time {time.time() - start}")
p_print(temp_game.env.board)

temp_mcts = my_mcts(temp_game,temp_args)
temp_mcts.getActionProb(temp_game,1)


total_time 0.0010039806365966797
array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [1, 1, 2, 2, 2, 0, 0]])
total time taken to run 1000 iterations is 0.9545364379882812
The probability distribution is  [0, 0, 0, 0, 1, 0, 0]


[0, 0, 0, 0, 1, 0, 0]

In [60]:
# Lets make some puzzles to test the algorithm
# Puzzle no 3 - player 2 has to find a way to do a mate in one
temp_args = {
    "load_model": False, # Whether to use a pretrained network 
    "load_folder_file":None, # 
    "checkpoint":"./temp/", # stores checkpoints here
    "num_mcts_sims":1000,
    "c":0.1,
    "num_iters":50, # number of times the NN is updated
    "maxlenOfQueue": 200000,
    "numItersForTrainExamplesHistory":20,
    "numEps":20
}

temp_game = Game()

start = time.time()
n_s,r,d,i = temp_game.getNextState(1)
n_s,r,d,i = temp_game.getNextState(2)
n_s,r,d,i = temp_game.getNextState(1)
n_s,r,d,i = temp_game.getNextState(3)
n_s,r,d,i = temp_game.getNextState(0)
n_s,r,d,i = temp_game.getNextState(4)
n_s,r,d,i = temp_game.getNextState(0)
print(f"total_time {time.time() - start}")
p_print(temp_game.env.board)

temp_mcts = my_mcts(temp_game,temp_args)
temp_mcts.getActionProb(temp_game,2)


total_time 0.0
array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [1, 1, 2, 2, 2, 0, 0]])
total time taken to run 1000 iterations is 0.9675066471099854
The probability distribution is  [1, 0, 0, 0, 0, 0, 0]


[1, 0, 0, 0, 0, 0, 0]

In [61]:
# Lets make some puzzles to test the algorithm
# Puzzle no 1 - Player one has to find a way to do a mate in one
temp_args = {
    "load_model": False, # Whether to use a pretrained network 
    "load_folder_file":None, # 
    "checkpoint":"./temp/", # stores checkpoints here
    "num_mcts_sims":1000,
    "c":0.1,
    "num_iters":50, # number of times the NN is updated
    "maxlenOfQueue": 200000,
    "numItersForTrainExamplesHistory":20,
    "numEps":20
}

temp_game = Game()

n_s,r,d,i = temp_game.getNextState(2)
n_s,r,d,i = temp_game.getNextState(1)
n_s,r,d,i = temp_game.getNextState(3)
n_s,r,d,i = temp_game.getNextState(2)
n_s,r,d,i = temp_game.getNextState(4)
n_s,r,d,i = temp_game.getNextState(0)
p_print(temp_game.env.board)

temp_mcts = my_mcts(temp_game,temp_args)
temp_mcts.getActionProb(temp_game,1)


array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 2, 0, 0, 0, 0],
       [2, 2, 1, 1, 1, 0, 0]])
total time taken to run 1000 iterations is 1.2641141414642334
The probability distribution is  [1, 0, 0, 0, 0, 0, 0]


[1, 0, 0, 0, 0, 0, 0]