In [None]:
import random
import dill
from game import Game, Move, Player
from strategies.rl import Q_learing, CustomState,  get_coordinates

class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
        from_pos = (random.randint(0, 4), random.randint(0, 4))
        move = random.choice([Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT])
        return from_pos, move


class MyPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
        from_pos = (random.randint(0, 4), random.randint(0, 4))
        move = random.choice([Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT])
        return from_pos, move
    
class RLPlayer(Player):
    def __init__(self, learning_rate=0.1, discount_factor=0.7, pretrain_path=None, save_model_path=None, max_steps=None, train=False) -> None:
        super().__init__()
        if train:
            ql = Q_learing(learning_rate, discount_factor, pretrain_path=pretrain_path, max_steps=max_steps)
            steps, self.value_dictionary = ql.train()

            if save_model_path is not None:
                d = {'steps': steps, 'value_dictionary': self.value_dictionary}

                with open(save_model_path, 'wb') as outfile:
                    dill.dump(d, outfile)
        elif not train and pretrain_path is not None:
            with open(pretrain_path, 'rb') as f:
                d = dill.load(f)

            self.value_dictionary = d['value_dictionary']
            print(len(self.value_dictionary))
        
    def make_move(self, game: Game) -> tuple[tuple[int, int], Move]:
        current_state = CustomState(get_coordinates(game.get_board()))

        if current_state in self.value_dictionary:
            list_action = sorted(self.value_dictionary[current_state], key=self.value_dictionary[current_state].get)

            action = list_action[0].split('-')
            from_pos = tuple((int(c) for c in action[0] if c.isdigit()))

            if action[1] == 'Move.LEFT':
                move = Move.LEFT
            elif action[1] == 'Move.RIGHT':
                move = Move.RIGHT
            elif action[1] == 'Move.TOP':
                move = Move.TOP
            else:
                move = Move.BOTTOM
        else:
            ## Random play
            from_pos = (random.randint(0, 4), random.randint(0, 4))
            move = random.choice([Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT])
            
        return from_pos, move
    
player1 = RLPlayer(pretrain_path='train_results/rl.pik')
player2 = RandomPlayer()

In [None]:
win = 0
lose = 0
draw = 0
for i in range(10):
    print(i)
    g = Game()
    winner = g.play(player1, player2)
    if winner == 0:
        win += 1
    elif winner == 1:
        lose += 1
    else:
        draw += 1

print(win)
print(lose)
print(draw)

In [None]:
import dill
from strategies.rl import CustomState, print_dictionary

with open('train_results/rl.pik', 'rb') as f:
    my_dict = dill.load(f)

print(my_dict['steps'])
print(len(my_dict['value_dictionary']))

In [1]:
from strategies.utils import MinMaxPlayer, RandomPlayer
from game import Game
from tqdm.auto import tqdm

win = 0
lose = 0
draw = 0

player1 = MinMaxPlayer()
player2 = RandomPlayer()

for _ in tqdm(range(100)):
    game = Game()
    winner = game.play(player1, player2)
    if winner == 0:
        win += 1
    elif winner == 1:
        lose += 1
    else:
        draw += 1
            
print(win)
print(lose)
print(draw)

  0%|          | 0/100 [00:00<?, ?it/s]

100
0
0


In [None]:
from tqdm.auto import tqdm
win = 0
lose = 0
draw = 0
for i in tqdm(range(10)):
    print('Game {0}'.format(i))
    g = Game()
    winner = g.play(player1, player2)
    if winner == 0:
        win += 1
    elif winner == 1:
        lose += 1
    else:
        draw += 1

print(win)
print(lose)
print(draw)

In [1]:
import numpy as np
array_struttura = np.array([
    [-1, -1, -1, -1, -1],
    [-1, -1, -1, -1, -1],
    [-1, -1, -1, -1, -1],
    [-1, -1, -1, -1, -1],
    [-1, -1, -1,  1,  0]
])
print(array_struttura[3,4])

KeyboardInterrupt: 

In [1]:
import random
import dill
from game import Game, Move, Player
from strategies.rl import Q_learing, CustomState,  get_coordinates, costruisci_array,print_dictionary
from copy import deepcopy
from strategies.utils import CustomGame
class RandomPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
        from_pos = (random.randint(0, 4), random.randint(0, 4))
        move = random.choice([Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT])
        print("turn random")
        return from_pos, move


class MyPlayer(Player):
    def __init__(self) -> None:
        super().__init__()

    def make_move(self, game: 'Game') -> tuple[tuple[int, int], Move]:
        from_pos = (random.randint(0, 4), random.randint(0, 4))
        move = random.choice([Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT])
        return from_pos, move
    
class RLPlayer(Player):
    def __init__(self, learning_rate=0.1, discount_factor=0.7, pretrain_path=None, save_model_path=None, max_steps=None, train=False) -> None:
        super().__init__()
        if train:
            ql = Q_learing(learning_rate, discount_factor, pretrain_path=pretrain_path, max_steps=max_steps)
            steps, self.value_dictionary = ql.train()

            if save_model_path is not None:
                print(len(self.value_dictionary))
                d = {'steps': steps, 'value_dictionary': self.value_dictionary}

                with open(save_model_path, 'wb') as outfile:
                    dill.dump(d, outfile)
        elif not train and pretrain_path is not None:
            with open(pretrain_path, 'rb') as f:
                d = dill.load(f)

            self.value_dictionary = d['value_dictionary']
            print(len(self.value_dictionary))
        
    def make_move(self, _game: Game) -> tuple[tuple[int, int], Move]:
        game=CustomGame(_game)
        current_state = CustomState(get_coordinates(game.get_board()))
        current_state.state=current_state.get_equivalent()
        game.modify_board(costruisci_array(current_state.state))

        #print_dictionary(self.value_dictionary)
        if current_state in self.value_dictionary:
            list_action = sorted(self.value_dictionary[current_state], key=self.value_dictionary[current_state].get)

            action = list_action[0].split('-')
            print(action[0])
            from_pos =tuple( (int(c) for c in action[0] if c.isdigit()))
            print(from_pos)

            game.print()
            if action[1] == 'Move.LEFT':
                move = Move.LEFT
            elif action[1] == 'Move.RIGHT':
                move = Move.RIGHT
            elif action[1] == 'Move.TOP':
                move = Move.TOP
            else:
                move = Move.BOTTOM
            game_=deepcopy(game)
            ok= game_.move(from_pos=from_pos,slide=move,player_id=game.current_player_idx)
            _game._board=game._board
            print(move,from_pos,ok)
        else:
            ## Random play
            print("random")
            from_pos = (random.randint(0, 4), random.randint(0, 4))
            move = random.choice([Move.TOP, Move.BOTTOM, Move.LEFT, Move.RIGHT])
        print("ended")
        return from_pos, move
    
player1 = RLPlayer(save_model_path='rl.pik',max_steps=100,train=True)
player2 = RandomPlayer()


0it [00:00, ?it/s]

state(x=[(0, 0)], o=[]):
  (3, 0)-Move.RIGHT: 0.0
  (4, 1)-Move.TOP: 0.0
  (2, 4)-Move.TOP: 0.0
  (1, 4)-Move.RIGHT: 0.0
  (3, 0)-Move.LEFT: 0.0
  (4, 3)-Move.BOTTOM: 0.0
  (2, 0)-Move.RIGHT: 0.0
  (1, 0)-Move.BOTTOM: 0.0
  (4, 1)-Move.BOTTOM: 0.0
  (0, 1)-Move.RIGHT: 0.0
  (1, 4)-Move.TOP: 0.0
  (1, 0)-Move.RIGHT: 0.0
  (4, 2)-Move.LEFT: 0.0
  (0, 3)-Move.BOTTOM: 0.0
  (0, 2)-Move.RIGHT: 0.0
  (2, 4)-Move.LEFT: 0.0
  (2, 0)-Move.BOTTOM: 0.0
  (4, 0)-Move.BOTTOM: 0.0
  (4, 3)-Move.LEFT: 0.0
  (0, 1)-Move.BOTTOM: 0.0
  (0, 4)-Move.TOP: 0.0
  (3, 4)-Move.TOP: 0.0
  (4, 4)-Move.LEFT: 0.0
  (2, 4)-Move.RIGHT: 0.0
  (0, 3)-Move.RIGHT: 0.0
  (4, 4)-Move.TOP: 0.0
  (2, 0)-Move.LEFT: 0.0
  (4, 2)-Move.TOP: 0.0
  (3, 4)-Move.RIGHT: 0.0
  (0, 4)-Move.RIGHT: 0.0
  (3, 0)-Move.BOTTOM: 0.0
  (4, 3)-Move.TOP: 0.0
  (1, 4)-Move.LEFT: 0.0
  (0, 2)-Move.TOP: 0.0
  (1, 0)-Move.LEFT: 0.0
  (3, 4)-Move.LEFT: 0.0
state(x=[], o=[]):
  (0, 0)-Move.BOTTOM: 0.0
  (4, 1)-Move.TOP: 0.0
  (1, 4)-Move.LEFT: 0.0
  

### GESTIRE RL PLAYER COME PLAYER 2