In [1]:
import numpy as np
import math
from simplified_connectx import *
%load_ext autoreload
%autoreload 2

##### In MCTS we store 1 tree

##### MCTS Variant, instead of using UCB_score to be infinite and forcing the 
##### Algorithm to explore the whole space, we can "guide" it with NN predictions
##### example p*(math.sqrt(self.total_counts))/(self.SA_counts[(s,action)])
##### Where p is the probability of picking that action according to the NN.

In [2]:
import logging

import coloredlogs

from Coach import Coach
from Game import Game
%autoreload 2

In [85]:
# arguments
args = {
    "load_model": False, # Whether to use a pretrained network 
    "load_folder_file":None, # 
    "checkpoint":"./temp/", # stores checkpoints here
    "num_mcts_sims":10000,
    "c":0.1,
    "num_iters":50, # number of times the NN is updated
    "maxlenOfQueue": 200000,
    "numItersForTrainExamplesHistory":20,
    "numEps":20
}


In [86]:
# Main cell to run the algorithm
g = Game()
c = Coach(g, args)
c.executeEpisode()
print(c)

Currently on move  1
total time taken to run 10000 iterations is 3.360729217529297
The probability distribution is  [0, 0, 0, 1, 0, 0, 0]
array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0]])
result  0
Currently on move  2


In [3]:
from my_mcts import my_mcts
from Game import Game
from Coach import p_print
import time

In [14]:
# Lets make some puzzles to test the algorithm
# Puzzle no 1 - player 2 has to stop a mate in one
temp_args = {
    "load_model": False, # Whether to use a pretrained network 
    "load_folder_file":None, # 
    "checkpoint":"./temp/", # stores checkpoints here
    "num_mcts_sims":1000,
    "c":0.1,
    "num_iters":50, # number of times the NN is updated
    "maxlenOfQueue": 200000,
    "numItersForTrainExamplesHistory":20,
    "numEps":20
}

temp_game = Game()

n_s,r,d,i = temp_game.getNextState(2)
n_s,r,d,i = temp_game.getNextState(1)
n_s,r,d,i = temp_game.getNextState(3)
n_s,r,d,i = temp_game.getNextState(2)
n_s,r,d,i = temp_game.getNextState(4)

p_print(temp_game.env.board)

temp_mcts = my_mcts(temp_game,temp_args)
temp_mcts.getActionProb(temp_game,2)


array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 2, 0, 0, 0, 0],
       [0, 2, 1, 1, 1, 0, 0]])
total time taken to run 1000 iterations is 0.8499479293823242
The probability distribution is  [0, 0, 0, 0, 0, 1, 0]


[0, 0, 0, 0, 0, 1, 0]

In [21]:
# Lets make some puzzles to test the algorithm
# Puzzle no 2 - player 1 has to stop a mate in one
temp_args = {
    "load_model": False, # Whether to use a pretrained network 
    "load_folder_file":None, # 
    "checkpoint":"./temp/", # stores checkpoints here
    "num_mcts_sims":1000,
    "c":0.1,
    "num_iters":50, # number of times the NN is updated
    "maxlenOfQueue": 200000,
    "numItersForTrainExamplesHistory":20,
    "numEps":20
}

temp_game = Game()

start = time.time()
n_s,r,d,i = temp_game.getNextState(1)
n_s,r,d,i = temp_game.getNextState(2)
n_s,r,d,i = temp_game.getNextState(1)
n_s,r,d,i = temp_game.getNextState(3)
n_s,r,d,i = temp_game.getNextState(0)
n_s,r,d,i = temp_game.getNextState(4)
print(f"total_time {time.time() - start}")
p_print(temp_game.env.board)

temp_mcts = my_mcts(temp_game,temp_args)
temp_mcts.getActionProb(temp_game,1)


total_time 0.0010619163513183594
array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [1, 1, 2, 2, 2, 0, 0]])
total time taken to run 1000 iterations is 0.24378585815429688
The probability distribution is  [0, 1, 0, 0, 0, 0, 0]


[0, 1, 0, 0, 0, 0, 0]

In [11]:
# Lets make some puzzles to test the algorithm
# Puzzle no 3 - player 2 has to find a way to do a mate in one
temp_args = {
    "load_model": False, # Whether to use a pretrained network 
    "load_folder_file":None, # 
    "checkpoint":"./temp/", # stores checkpoints here
    "num_mcts_sims":3000,
    "c":0.1,
    "num_iters":50, # number of times the NN is updated
    "maxlenOfQueue": 200000,
    "numItersForTrainExamplesHistory":20,
    "numEps":20
}

temp_game = Game()

start = time.time()
n_s,r,d,i = temp_game.getNextState(1)
n_s,r,d,i = temp_game.getNextState(2)
n_s,r,d,i = temp_game.getNextState(1)
n_s,r,d,i = temp_game.getNextState(3)
n_s,r,d,i = temp_game.getNextState(0)
n_s,r,d,i = temp_game.getNextState(4)
n_s,r,d,i = temp_game.getNextState(0)
print(f"total_time {time.time() - start}")
p_print(temp_game.env.board)

temp_mcts = my_mcts(temp_game,temp_args)
temp_mcts.getActionProb(temp_game,2)


total_time 0.0
array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [1, 1, 0, 0, 0, 0, 0],
       [1, 1, 2, 2, 2, 0, 0]])
total time taken to run 3000 iterations is 0.23569989204406738
The probability distribution is  [0, 0, 0, 0, 0, 1, 0]


[0, 0, 0, 0, 0, 1, 0]

In [64]:
# Lets make some puzzles to test the algorithm
# Puzzle no 1 - Player one has to find a way to do a mate in one
temp_args = {
    "load_model": False, # Whether to use a pretrained network 
    "load_folder_file":None, # 
    "checkpoint":"./temp/", # stores checkpoints here
    "num_mcts_sims":1000,
    "c":0.1,
    "num_iters":50, # number of times the NN is updated
    "maxlenOfQueue": 200000,
    "numItersForTrainExamplesHistory":20,
    "numEps":20
}

temp_game = Game()

n_s,r,d,i = temp_game.getNextState(2)
n_s,r,d,i = temp_game.getNextState(1)
n_s,r,d,i = temp_game.getNextState(3)
n_s,r,d,i = temp_game.getNextState(2)
n_s,r,d,i = temp_game.getNextState(4)
n_s,r,d,i = temp_game.getNextState(0)
p_print(temp_game.env.board)

temp_mcts = my_mcts(temp_game,temp_args)
temp_mcts.getActionProb(temp_game,1)


array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 2, 0, 0, 0, 0],
       [2, 2, 1, 1, 1, 0, 0]])
total time taken to run 1000 iterations is 0.07547116279602051
The probability distribution is  [0, 0, 0, 0, 0, 1, 0]


[0, 0, 0, 0, 0, 1, 0]

In [65]:
# Sanity check for the algorithm
from collections import deque,defaultdict
def convert_string_to_board(string):
    return [int(i) for i in string]

def print_state_graph(root_game):
    '''
    Does a bfs from the root state
    '''
    game = root_game.create_copy()
    q = deque()
    q.append(game)
    tree = defaultdict(deque)
    level = 0
    while len(q):
        cur = q.popleft()
        if len(q) == 0:
            level += 1
        cur_str = cur.stringRepresentation()
        for i in range(cur.get_possible_actions()):
            if (cur_str,i) in temp_mcts.SA_counts:
                game_copy = game.create_copy()
                a,b,c,d = game_copy.getNextState(i)
                
p_print(temp_game.env.board)
base_state = temp_mcts.S_counts[temp_game.stringRepresentation()]
print(base_state)

array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0],
       [0, 0, 2, 0, 0, 0, 0],
       [2, 2, 1, 1, 1, 0, 0]])
1000
