In [None]:
'''Import modules'''
from collections import namedtuple, defaultdict
from random import choice, shuffle
from copy import deepcopy
from typing import Type
from itertools import combinations


In [None]:
'''Define utilities'''
State = namedtuple('State', ['X', 'O'])
MAGIC_SQUARE = [2, 7, 6, 9, 5, 1, 4, 3, 8]

In [None]:
'''Player class'''
class Player:
    def __init__(self, name = "RandomPlayer"):
        self.name = name
        self.strategy = self.random_choice

    def random_choice(self, available_moves, our_state=None, opponent_state=None, policies=None):
        move = choice(available_moves)
        return move
    
    def trained_player(self, available_moves, our_state, opponent_state, policies):
        max_value = float('-inf')
        best_move = choice(available_moves)
    
        for move in available_moves:
        
            possible_state = our_state.copy()
            possible_state.append(move)
            rating = policies.get(frozenset(possible_state), frozenset(opponent_state))
            if rating > max_value:
                    max_value = rating
                    best_move = move
    
        return best_move
    
    def set_trained_player(self):
         self.strategy = self.trained_player

    
    def __str__(self) -> str:
        return f"{self.name}"

In [None]:
'''Game class'''
class TicTacToe:
    def __init__(self):
        self.board = MAGIC_SQUARE
        self.available_moves = MAGIC_SQUARE.copy() # Define the board
        self.X_boxes = []
        self.O_boxes = []

    def print_board(self):
        for row in range(3):

            if row != 0 and row != 3 :
                print("-------------")

            for col in range(3):
                i = row * 3 + col
                char = " "
                if self.board[i] in self.X_boxes:
                    char = "X"
                elif self.board[i] in self.O_boxes:
                    char = "O"
               
                print(f"| {char}", end=" ")
                

            print("|")

    def win_condition(self, moves_set):
        win_condition = any(sum(c) == 15 for c in combinations(moves_set, 3))
        return win_condition

    def play_game(self, playerA, playerB):

        players = [playerA, playerB]
        shuffle(players)

        playerX, playerO = players
        current_player = playerX

        print(f"{playerX} is X, {playerO} is O")

        trajectory = list()
        
        while self.available_moves:

            # Select a move using the player strategy
            move = current_player.strategy(self.available_moves)

            # Remove the move from the available
            self.available_moves.remove(move)

            if current_player == playerX :
                if self.win_condition(self.X_boxes):
                    self.X_boxes.append(move)
                    trajectory.append(State(self.X_boxes.copy(), self.O_boxes.copy()))
                    break
                else:
                    self.X_boxes.append(move)
                    trajectory.append(State(self.X_boxes.copy(), self.O_boxes.copy()))
                    current_player = playerO
            else:
                if self.win_condition(self.O_boxes):
                    self.O_boxes.append(move)
                    trajectory.append(State(self.X_boxes.copy(), self.O_boxes.copy()))
                    break
                else :
                    self.O_boxes.append(move)
                    trajectory.append(State(self.X_boxes.copy(), self.O_boxes.copy()))
                    current_player = playerX

        return trajectory
    
    def play_game_trained(self, policies, opponent):
        playerT = Player("TrainedPlayer")
        playerT.set_trained_player()

        players = [playerT, opponent]
        shuffle(players)

        current_player = players[0]
        winner = None

        while self.available_moves:

            # Select a move using the player strategy
            if current_player == playerT:
                move = current_player.strategy(self.available_moves, self.X_boxes, self.O_boxes, policies) # Trained plays always X
                self.X_boxes.append(move)
                if self.win_condition(self.X_boxes):
                    winner = playerT
                    print("TRAINED PLAYER WON")
                    break
            else:
                move = current_player.strategy(self.available_moves) # Random plays always X
                self.O_boxes.append(move)
                if self.win_condition(self.X_boxes):
                    winner = opponent
                    print("Uuuups, opponent won")
                    break

            # Remove the move from the available
            self.available_moves.remove(move)

        if winner == playerT:
            return 1
        elif winner == opponent:
            return -1
        else:
            return 0




            


In [None]:
value_dictionary = defaultdict(float)
hit_state = defaultdict(int)
epsilon = 0.05

def update_dict(learner_state, opponent_state, reward):
    hashable_state = (frozenset(learner_state), frozenset(opponent_state))
    hit_state[hashable_state] += 1
    value_dictionary[hashable_state] = value_dictionary[
        hashable_state
    ] + epsilon * (reward - value_dictionary[hashable_state])

In [None]:
# match = TicTacToe()
playerA = Player("Random1")
playerB = Player("Random2")

for times in range(1_000):
    match = TicTacToe()
    trajectory = match.play_game(playerA, playerB)
    # print(trajectory)

    last_state = trajectory[-1]

    if match.win_condition(last_state.X):
        print("X won")

        for state in trajectory:
            update_dict(state.X, state.O, 1)
            update_dict(state.O, state.X, -1)
        
    elif match.win_condition(last_state.O):
        print("O won")

        for state in trajectory:
            update_dict(state.O, state.X, 1)
            update_dict(state.X, state.O, 1)
    else:
        print("It is a draw")

        for state in trajectory:
            update_dict(state.X, state.O, 0.5)
            update_dict(state.O, state.X, 0.5)

    print()

print(value_dictionary)

In [None]:
playerC = Player("TrainedPlayer")
playerC.set_trained_player()

draws = 0
trained_player_victories = 0
opponent_player_victories = 0

for times in range(1_000):
    match = TicTacToe()

    result = match.play_game_trained(value_dictionary, playerC)

    if result == 1:
        trained_player_victories += 1

    elif result == -1:
        opponent_player_victories += 1

    else:
        draws += 1

print(f"Number of matches won by TrainedPlayer: {trained_player_victories}")
print(f"Number of matches won by Opponent: {opponent_player_victories}")
print(f"Number of draw matches : {draws}")