In [1]:
import numpy as np

In [41]:
class Node():
    
    def __init__(self, state, depth, maximiser, player, parent=None,):
        self.state = state
        self.children = list()
        self.score = 0
        self.depth = depth
        self.maximiser = maximiser
        self.player = player
        self.best_child = None
        
        if parent is not None:
            parent.children.append(self)
        
    def __hash__(self):
        
        return hash(str(self.str))
    
    

In [138]:
class Environment():
    
    def __init__(self, start_state=None):
        
        if start_state is None:
            self.start_state = np.array([['.','.','.'],['.','.','.'],['.','.','.']])
        else:
            self.start_state = start_state
    def get_moves(self, state, player):
        
        new_states = []
        spaces = []
        for i in range(3):
            for j in range(3):
                if state[i][j]=='.':
                    new_state = state.copy()
                    new_state[i,j] = player
                    new_states.append(new_state)
        
        return new_states
    
    def check_terminal(self, state):
        for i in range(3):
            for j in range(3):
                if state[i][j]=='.':
                    return False
        
        return True
    
    def evaluate(self, state):
        
        for val in range(3):
            if state[val,0] == state[val,1] == state[val,2]!='.':
                if state[val, 0]=='x':
                    return 10
                else:
                    return -10
            
            if state[0,val] == state[1,val] == state[2,val]!='.':
                if state[0,val]=='x':
                    return 10
                else:
                    return -10
        
        if state[0,0] == state[1,1] == state[2,2]!='.':
            if state[0,0]=='x':
                return 10
            else:
                return -10
        
        if state[0,2] == state[1,1] == state[2,0]!='.':
            if state[0,2]=='x':
                return 10
            else:
                return -10
        
        return 0

    def get_start_state(self):
        return self.start_state


In [158]:
class Agent():
    
    def __init__(self, env):
        
        self.env = env
        self.start_state = env.get_start_state()
        self.root_node = None
        self.neginf = -10**18
        self.posinf = 10**18
    
    
    def minimax(self, node):
        
        score = self.env.evaluate(node.state)
        if score!=0:
            node.score = score
            return node
        
        if self.env.check_terminal(node.state):
            node.score = 0
            return node
        
        if node.maximiser:
            
            best_score = self.neginf
            best_depth = self.posinf
            next_moves = self.env.get_moves(node.state, node.player)
            for move in next_moves:
                child = Node(state = move, depth=node.depth+1, 
                             maximiser=not node.maximiser, player='o', parent=node)
                
                child= self.minimax(child)
                node.children.append(child)
                
                if best_score<child.score and child.depth<best_depth:
                    best_score = child.score
                    best_depth = child.depth
                    node.best_child = child
                    node.depth = best_depth
                    node.score = best_score
            
            return node
        
        else:
            best_score = self.posinf
            best_depth = self.posinf
            next_moves = self.env.get_moves(node.state, node.player)
            
            for move in next_moves:
                child = Node(state = move, depth=node.depth+1, 
                             maximiser=not node.maximiser, player='x', parent=node)
                
                child = self.minimax(child)
                node.children.append(child)
                
                
                if best_score>child.score and child.depth<best_depth:
                    best_score = child.score
                    best_depth = child.depth
                    node.best_child = child
            
            node.depth = best_depth
            node.score = best_score
            
            return node

    def run(self):
        
        self.root_node = Node(state=self.start_state, depth=0, maximiser=True,
                             player='x', parent=None)
        
        self.root_node = self.minimax(self.root_node)
        
    def print_nodes(self):
        
        node = self.root_node
        
        while node is not None:
            print(node.state)
            node = node.best_child

In [159]:
start_state = np.array([['o','x','o'],['.','x','.'],['.','.','.']])
env = Environment(start_state = start_state)
agent = Agent(env)
agent.run()

In [160]:
agent.print_nodes()

[['o' 'x' 'o']
 ['.' 'x' '.']
 ['.' '.' '.']]
[['o' 'x' 'o']
 ['x' 'x' '.']
 ['.' '.' '.']]
[['o' 'x' 'o']
 ['x' 'x' 'o']
 ['.' '.' '.']]
[['o' 'x' 'o']
 ['x' 'x' 'o']
 ['x' '.' '.']]
[['o' 'x' 'o']
 ['x' 'x' 'o']
 ['x' '.' 'o']]


In [157]:
for i in agent.root_node.children:
    print(i.state, i.depth)

[['o' 'x' 'o']
 ['x' 'x' '.']
 ['.' '.' '.']] 4
[['o' 'x' 'o']
 ['x' 'x' '.']
 ['.' '.' '.']] 4
[['o' 'x' 'o']
 ['.' 'x' 'x']
 ['.' '.' '.']] 9
[['o' 'x' 'o']
 ['.' 'x' 'x']
 ['.' '.' '.']] 9
[['o' 'x' 'o']
 ['.' 'x' '.']
 ['x' '.' '.']] 8
[['o' 'x' 'o']
 ['.' 'x' '.']
 ['x' '.' '.']] 8
[['o' 'x' 'o']
 ['.' 'x' '.']
 ['.' 'x' '.']] 5
[['o' 'x' 'o']
 ['.' 'x' '.']
 ['.' 'x' '.']] 5
[['o' 'x' 'o']
 ['.' 'x' '.']
 ['.' '.' 'x']] 8
[['o' 'x' 'o']
 ['.' 'x' '.']
 ['.' '.' 'x']] 8


In [150]:
state = np.array([['o','x','o'],['.','x','.'],['.','x','.']])
env.evaluate(state)

10