In [1]:
import random
from game import Game, Move, Player
from strategies.rl import Q_learing, CustomState,  get_coordinates

class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
        from_pos = (random.randint(0, 4), random.randint(0, 4))
        move = random.choice([Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT])
        return from_pos, move


class MyPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
        from_pos = (random.randint(0, 4), random.randint(0, 4))
        move = random.choice([Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT])
        return from_pos, move
    
class RLPlayer(Player):
    def __init__(self, steps, learning_rate, discount_factor) -> None:
        super().__init__()
        ql = Q_learing(steps, learning_rate, discount_factor)
        self.value_dictionary = ql.train()
        
    def make_move(self, game: Game) -> tuple[tuple[int, int], Move]:
        current_state = CustomState(get_coordinates(game.get_board()))
        list_action = sorted(self.value_dictionary[current_state], key=self.value_dictionary[current_state].get)
        '''
        for action in list_action:
            if action not in (current_state.x.union(current_state.o)):
                current_state.x.add(action)
                break
        '''

        from_pos, move = list_action[0]
        return from_pos, move
    
g = Game()

player1 = RLPlayer(5000, 0.1, 0.7)
player2 = RandomPlayer()



  0%|          | 0/5000 [00:00<?, ?it/s]

state(x=[], o=[]):
  (1, 0) Move.RIGHT: 0.0
state(x=[(0, 0)], o=[]):
  (1, 0) Move.RIGHT: 0.0
  (2, 4) Move.TOP: 0.0
state(x=[(0, 0)], o=[(0, 2)]):
  (2, 4) Move.TOP: 0.0
  (0, 4) Move.TOP: 0.0
state(x=[(0, 0), (0, 4)], o=[(0, 2)]):
  (0, 4) Move.TOP: 0.0
  (1, 0) Move.RIGHT: 0.0
state(x=[(0, 0), (0, 3)], o=[(0, 1), (0, 4)]):
  (1, 0) Move.RIGHT: 0.0
  (0, 1) Move.TOP: 0.0
state(x=[(0, 0), (0, 1), (3, 0)], o=[(1, 0), (4, 0)]):
  (0, 1) Move.TOP: 0.0
  (2, 0) Move.BOTTOM: 0.0
state(x=[(0, 0), (0, 1), (3, 0)], o=[(1, 0), (2, 4), (4, 0)]):
  (2, 0) Move.BOTTOM: 0.0
  (3, 0) Move.LEFT: 0.0
state(x=[(0, 0), (0, 1), (1, 0)], o=[(0, 2), (0, 4), (4, 2)]):
  (3, 0) Move.LEFT: 0.0
  (0, 4) Move.RIGHT: 0.0
state(x=[(0, 0), (0, 1), (1, 0)], o=[(0, 2), (0, 4), (4, 1), (4, 4)]):
  (0, 4) Move.RIGHT: 0.0
  (3, 0) Move.RIGHT: 0.0
state(x=[(0, 0), (0, 1), (0, 4), (1, 0)], o=[(0, 2), (0, 3), (4, 1), (4, 4)]):
  (3, 0) Move.RIGHT: 0.0
  (2, 0) Move.BOTTOM: 0.0
state(x=[(0, 0), (0, 1), (0, 4), (1, 0)], o=

In [2]:
g.print()
winner = g.play(player1, player2)
g.print()
print(f"Winner: Player {winner}")

[[-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]]
[[-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]
 [-1 -1 -1 -1 -1]]


AttributeError: 'RLPlayer' object has no attribute 'value_dictionary'