In [None]:
class TicTacToe:
    def __init__(self):
        self.board = [' ' for _ in range(9)] # 3x3 board
        self.current_winner = None # keep track of winner

    def print_board(self):
        for row in [self.board[i*3:(i+1)*3] for i in range(3)]:
            print('| ' + ' | '.join(row) + ' |')

    @staticmethod
    def print_board_nums():
        # 0 | 1 | 2
        # - - - - -
        # 3 | 4 | 5
        # - - - - -
        # 6 | 7 | 8
        number_board = [[str(i) for i in range(j*3, (j+1)*3)] for j in range(3)]
        for row in number_board:
            print('| ' + ' | '.join(row) + ' |')

    def available_moves(self):
        return [i for i, spot in enumerate(self.board) if spot == ' ']

    def empty_squares(self):
        return ' ' in self.board

    def num_empty_squares(self):
        return self.board.count(' ')

    def make_move(self, square, letter):
        if self.board[square] == ' ':
            self.board[square] = letter
            if self.winner(square, letter):
                self.current_winner = letter
            return True
        return False

    def winner(self, square, letter):
        # check the row
        row_ind = square // 3
        row = self.board[row_ind*3 : (row_ind+1)*3]
        if all([spot == letter for spot in row]):
            return True

        # check the column
        col_ind = square % 3
        column = [self.board[col_ind+i*3] for i in range(3)]
        if all([spot == letter for spot in column]):
            return True

        # check diagonals
        if square % 2 == 0:
            diagonal1 = [self.board[i] for i in [0, 4, 8]]
            if all([spot == letter for spot in diagonal1]):
                return True
            diagonal2 = [self.board[i] for i in [2, 4, 6]]
            if all([spot == letter for spot in diagonal2]):
                return True

        return False

In [None]:
def play(game, x_player, o_player, print_game=True):
    if print_game:
        game.print_board_nums()

    letter = 'X' # starting letter
    # iterate while the game still has empty squares
    while game.empty_squares():
        # get the move from the appropriate player
        if letter == 'O':
            square = o_player.get_move(game)
        else:
            square = x_player.get_move(game)

        # make a move
        if game.make_move(square, letter):
            if print_game:
                print(letter + f' makes a move to square {square}')
                game.print_board()
                print('') # empty line

            # check for winner
            if game.current_winner:
                if print_game:
                    print(letter + ' wins!')
                return letter # ends the loop and exits the game
            # switch players
            letter = 'O' if letter == 'X' else 'X'

    if print_game:
        print('It\'s a tie!')

In [None]:
import random

class HumanPlayer:
    def __init__(self, letter):
        self.letter = letter

    def get_move(self, game):
        valid_square = False
        val = None
        while not valid_square:
            square = input(self.letter + '\'s turn. Input move (0-8):')
            try:
                val = int(square)
                if val not in game.available_moves():
                    raise ValueError
                valid_square = True
            except ValueError:
                print('Invalid square. Try again.')
        return val

class RandomComputerPlayer:
    def __init__(self, letter):
        self.letter = letter

    def get_move(self, game):
        square = random.choice(game.available_moves())
        return square

if __name__ == '__main__':
    x_player = HumanPlayer('X')
    o_player = RandomComputerPlayer('O')
    t = TicTacToe()
    play(t, x_player, o_player)

| 0 | 1 | 2 |
| 3 | 4 | 5 |
| 6 | 7 | 8 |
X's turn. Input move (0-8):4
X makes a move to square 4
|   |   |   |
|   | X |   |
|   |   |   |

O makes a move to square 8
|   |   |   |
|   | X |   |
|   |   | O |

X's turn. Input move (0-8):1
X makes a move to square 1
|   | X |   |
|   | X |   |
|   |   | O |

O makes a move to square 6
|   | X |   |
|   | X |   |
| O |   | O |

X's turn. Input move (0-8):7
X makes a move to square 7
|   | X |   |
|   | X |   |
| O | X | O |

X wins!


In [None]:
import collections

class QLearningPlayer:
    def __init__(self, letter, alpha=0.1, gamma=0.9, epsilon=0.1):
        self.letter = letter
        self.q_table = collections.defaultdict(lambda: collections.defaultdict(float))
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.previous_state = None
        self.previous_action = None

    def get_state_representation(self, board):
        # Convert the board list to a hashable tuple
        return tuple(board)

    def get_move(self, game):
        state = self.get_state_representation(game.board)
        available_moves = game.available_moves()

        # Epsilon-greedy approach
        if random.random() < self.epsilon:
            # Explore: choose a random move
            move = random.choice(available_moves)
        else:
            # Exploit: choose the move with the highest Q-value
            q_values = {move: self.q_table[state][move] for move in available_moves}
            max_q_value = max(q_values.values())
            best_moves = [move for move, q_value in q_values.items() if q_value == max_q_value]
            move = random.choice(best_moves) # Handle ties randomly

        self.previous_state = state
        self.previous_action = move
        return move

    def update_q_table(self, reward, next_state_representation):
        if self.previous_state is not None and self.previous_action is not None:
            current_q = self.q_table[self.previous_state][self.previous_action]
            max_next_q = max(self.q_table[next_state_representation].values()) if self.q_table[next_state_representation] else 0.0
            new_q = current_q + self.alpha * (reward + self.gamma * max_next_q - current_q)
            self.q_table[self.previous_state][self.previous_action] = new_q

        self.previous_state = None
        self.previous_action = None

In [None]:
def train_q_player(player, num_episodes=10000):
    for i in range(num_episodes):
        game = TicTacToe()
        # Play against itself for training
        x_player = player if player.letter == 'X' else RandomComputerPlayer('X')
        o_player = player if player.letter == 'O' else RandomComputerPlayer('O')


        letter = 'X'
        while game.empty_squares():
            if letter == 'O':
                square = o_player.get_move(game)
            else:
                square = x_player.get_move(game)

            if game.make_move(square, letter):
                next_state_representation = player.get_state_representation(game.board)

                if game.current_winner:
                    reward = 1 if game.current_winner == player.letter else -1
                    player.update_q_table(reward, next_state_representation)
                    break # Game is over
                elif not game.empty_squares():
                    reward = 0.5 # Tie
                    player.update_q_table(reward, next_state_representation)
                    break # Game is over
                else:
                    reward = 0 # Game continues
                    player.update_q_table(reward, next_state_representation)


                letter = 'O' if letter == 'X' else 'X'

            # If the other player made a move, update the previous_state and previous_action for the Q-learning player
            if isinstance(x_player, QLearningPlayer) and letter == 'X':
                 x_player.previous_state = x_player.get_state_representation(game.board)
                 x_player.previous_action = None # The other player made the move
            if isinstance(o_player, QLearningPlayer) and letter == 'O':
                 o_player.previous_state = o_player.get_state_representation(game.board)
                 o_player.previous_action = None # The other player made the move

        # After the game, update the Q-table for the last move if it resulted in a win or tie
        if game.current_winner == player.letter:
             player.update_q_table(1, player.get_state_representation(game.board))
        elif not game.empty_squares() and not game.current_winner:
             player.update_q_table(0.5, player.get_state_representation(game.board))

In [None]:
# Create and train a Q-learning player
q_player = QLearningPlayer('X')
train_q_player(q_player)

# Print the final Q-table
print("Final Q-table:")
for state, actions in q_player.q_table.items():
    print(f"State: {state}")
    for action, q_value in actions.items():
        print(f"  Action {action}: {q_value}")

Final Q-table:
State: (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ')
  Action 0: 0.0
  Action 1: 0.0
  Action 2: 0.0
  Action 3: 0.0
  Action 4: 0.0
  Action 5: 0.0
  Action 6: 0.0
  Action 7: 0.0
  Action 8: 0.0
State: (' ', ' ', ' ', ' ', ' ', ' ', 'X', ' ', ' ')
State: (' ', ' ', ' ', ' ', ' ', ' ', 'X', ' ', 'O')
  Action 0: 0.0
  Action 1: 0.0
  Action 2: 0.0
  Action 3: 0.0
  Action 4: 0.0
  Action 5: 0.0
  Action 7: 0.0
State: (' ', 'X', ' ', ' ', ' ', ' ', 'X', ' ', 'O')
State: (' ', 'X', ' ', ' ', ' ', ' ', 'X', 'O', 'O')
  Action 2: 0.0
  Action 0: 0.0
  Action 3: 0.0
  Action 4: 0.0
  Action 5: 0.0
State: (' ', 'X', 'X', ' ', ' ', ' ', 'X', 'O', 'O')
State: (' ', 'X', 'X', 'O', ' ', ' ', 'X', 'O', 'O')
  Action 0: 0.19
  Action 4: 0.0
  Action 5: 0.0
State: ('X', 'X', 'X', 'O', ' ', ' ', 'X', 'O', 'O')
State: (' ', ' ', 'X', ' ', ' ', ' ', ' ', ' ', ' ')
State: (' ', ' ', 'X', ' ', 'O', ' ', ' ', ' ', ' ')
  Action 0: 0.0
  Action 1: 0.0
  Action 3: 0.0
  Action 5: 0.0
  Acti

In [None]:
print("\nQ-table as State-Action Matrix:")
# Get all possible actions (0-8)
all_actions = range(9)

# Print header row with actions
header = ["State"] + [f"Action {a}" for a in all_actions]
print("\t".join(header))

# Print each state and its Q-values for each action
for state, actions in q_player.q_table.items():
    row = [str(state)]
    for action in all_actions:
        # Get Q-value for the action, default to 0.0 if action not in table for this state
        q_value = actions.get(action, 0.0)
        row.append(f"{q_value:.4f}") # Format Q-value for readability
    print("\t".join(row))


Q-table as State-Action Matrix:
State	Action 0	Action 1	Action 2	Action 3	Action 4	Action 5	Action 6	Action 7	Action 8
(' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ')	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000
(' ', ' ', ' ', ' ', ' ', ' ', 'X', ' ', ' ')	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000
(' ', ' ', ' ', ' ', ' ', ' ', 'X', ' ', 'O')	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000
(' ', 'X', ' ', ' ', ' ', ' ', 'X', ' ', 'O')	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000
(' ', 'X', ' ', ' ', ' ', ' ', 'X', 'O', 'O')	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000
(' ', 'X', 'X', ' ', ' ', ' ', 'X', 'O', 'O')	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000
(' ', 'X', 'X', 'O', ' ', ' ', 'X', 'O', 'O')	0.1900	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000
('X', 'X', 'X', 'O', ' ', ' ', 'X', 'O', 'O')	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000
(' ', ' 

In [None]:
# Play against the trained Q-learning player
if __name__ == '__main__':
    # Create a new instance of the trained Q-learning player (without retraining)
    q_player_X = q_player # Use the already trained player
    human_player_O = HumanPlayer('O')

    t = TicTacToe()
    print("Let's play against the Q-learning agent!")
    play(t, q_player_X, human_player_O)

Let's play against the Q-learning agent!
| 0 | 1 | 2 |
| 3 | 4 | 5 |
| 6 | 7 | 8 |
X makes a move to square 6
|   |   |   |
|   |   |   |
| X |   |   |

O's turn. Input move (0-8):4
O makes a move to square 4
|   |   |   |
|   | O |   |
| X |   |   |

X makes a move to square 3
|   |   |   |
| X | O |   |
| X |   |   |

O's turn. Input move (0-8):0
O makes a move to square 0
| O |   |   |
| X | O |   |
| X |   |   |

X makes a move to square 8
| O |   |   |
| X | O |   |
| X |   | X |

O's turn. Input move (0-8):7
O makes a move to square 7
| O |   |   |
| X | O |   |
| X | O | X |

X makes a move to square 5
| O |   |   |
| X | O | X |
| X | O | X |

O's turn. Input move (0-8):1
O makes a move to square 1
| O | O |   |
| X | O | X |
| X | O | X |

O wins!
