In [None]:
import numpy as np
import random
from collections import defaultdict

In [None]:
class MonteCarloTreeSearchNode():
    
    def __init__(self, state, parent=None, parent_action=None, name=None):
        self.state = state
        self.parent = parent
        self.name = name
        self.parent_action = parent_action
        self.children = []
        self._number_of_visits = 0
        self.t_score = 0

        self._results = defaultdict(int)
        self._results[1] = 0
        self._results[-1] = 0
        
    def expand(self):
        
        legals_actions = self.state.legals_actions
        #print(len(legals_actions),"legals actions")
    
        for action in legals_actions:
            
            #print("-carte", action.name)
            next_state = self.state.move(action) 
            
            child_node = MonteCarloTreeSearchNode(next_state, parent=self, parent_action=action, name=action.name)
            
            self.children.append(child_node)
            
            # We can do a rollout right after adding it, because in the formula if ni = 0, UCT = inf
            reward = child_node.rollout()
            
            child_node.backpropagate(reward)
        return child_node
            
    
    def rollout(self):
        """simulate a game till the end"""
        
        current_rollout_state = self.state
        
        while current_rollout_state.is_not_terminal_node():

            possible_moves = current_rollout_state.legals_actions
            action = self.rollout_policy(possible_moves)
            current_rollout_state = current_rollout_state.move(action)
            
        
        return current_rollout_state.game_result()
    
    def rollout_policy(self, possible_moves):
        """return a random card #light playout"""
        
        return possible_moves[np.random.randint(len(possible_moves))]
    
    
    def max_UCB(self):
        """Return the node to expand, the one that maximize UCB"""
        
        current_node = self
        while current_node.children:
            current_node = current_node.best_child()
            current_node.max_UCB()

        return current_node 
    
    def n(self):
        return self._number_of_visits
    
    def best_child(self, c_param=2.):
        """return child that maximize UCB"""
    
        choices_weights = [self.t_score + c_param * np.sqrt(np.log(self.n()) / c.n()) for c in self.children]
        return self.children[np.argmax(choices_weights)]
    
    def backpropagate(self, result):
        self._number_of_visits += 1.
        self._results[result] += 1.
        
        if self.parent:
            self.parent.backpropagate(result)
            
    def best_action(self, nb_simulations):

        nb_simulations = nb_simulations
        
        for i in range(nb_simulations):
            
            if i % 100 == 0: 
                print(f"Simulation {i}")
            
            node_to_expand = self.max_UCB()
            
            if node_to_expand.state.possible_state:
                
                node_to_expand.expand()
                
            else:
                print("No need to continue this branch")
    
        return self.best_child(c_param=0)
