In [1]:
import numpy as np
np.__version__

'1.23.2'

In [2]:
class TicTacToe:
    def __init__(self):# definies an initial function
        self.row_count = 3 
        # defines row count- Number of rows
        self.column_count = 3
        # defines column count- Number of columns
        self.action_size = self.row_count * self.column_count
        # An action size is the amount of rows times the amoutn of columns. Gives the entire sample space 
        
    def get_initial_state(self):# defines as function for the inital state of the player
        return np.zeros((self.row_count, self.column_count))
    
    def get_next_state(self, state, action, player):# defines a function to obtain the next state of the player
        row = action // self.column_count # Action divide by the column count
        column = action % self.column_count # use modulo to get the remainder for the column of the player
        state[row, column] = player # creates a 2D array for the co-ordinate position of the player itslef
        return state # returns the 2D array as is 
    
    def get_valid_moves(self, state): # function to get the valid moves possible for the player as well as the AI
        return (state.reshape(-1) == 0).astype(np.uint8) # converts the 2D array into a 1D array so that it is easier for computation
    
    def check_win(self, state, action): # method to tell whether the player has won or not
        row = action // self.column_count # gets the row again
        column = action % self.column_count # gets the column again
        player = state[row, column] # retrieves the board of the player
        
        return (
            np.sum(state[row, :]) == player * self.column_count # calculates if the sum of the row is equal to the player times the column count
            or np.sum(state[:, column]) == player * self.row_count # calculates the sum of the column is equal to the player times the row count
            or np.sum(np.diag(state)) == player * self.row_count # calculates
            or np.sum(np.diag(np.flip(state, axis=0))) == player * self.row_count
        )
    
    def get_value_and_terminated(self, state, action):
        if self.check_win(state, action):
            return 1, True
        if np.sum(self.get_valid_moves(state)) == 0:
            return 0, True
        return 0, False
    
    def get_opponent(self, player):
        return -player

In [3]:
tictactoe = TicTacToe()
player = 1

state = tictactoe.get_initial_state()


while True:
    print(state)
    valid_moves = tictactoe.get_valid_moves(state)
    print("valid_moves", [i for i in range(tictactoe.action_size) if valid_moves[i] == 1])
    action = int(input(f"{player}:"))
    
    if valid_moves[action] == 0:
        print("action not valid")
        continue
        
    state = tictactoe.get_next_state(state, action, player)
    
    value, is_terminal = tictactoe.get_value_and_terminated(state, action)
    
    if is_terminal:
        print(state)
        if value == 1:
            print(player, "won")
        else:
            print("draw")
        break
        
    player = tictactoe.get_opponent(player)



[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
valid_moves [0, 1, 2, 3, 4, 5, 6, 7, 8]
[[0. 1. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
valid_moves [0, 2, 3, 4, 5, 6, 7, 8]
[[ 0.  1.  0.]
 [ 0. -1.  0.]
 [ 0.  0.  0.]]
valid_moves [0, 2, 3, 5, 6, 7, 8]
[[ 1.  1.  0.]
 [ 0. -1.  0.]
 [ 0.  0.  0.]]
valid_moves [2, 3, 5, 6, 7, 8]
[[ 1.  1. -1.]
 [ 0. -1.  0.]
 [ 0.  0.  0.]]
valid_moves [3, 5, 6, 7, 8]
[[ 1.  1. -1.]
 [ 0. -1.  0.]
 [ 0.  0.  1.]]
valid_moves [3, 5, 6, 7]
[[ 1.  1. -1.]
 [ 0. -1.  0.]
 [-1.  0.  1.]]
-1 won
