In [26]:
# Copyright © 2021 Giovanni Squillero <squillero@polito.it>
# Free for personal or classroom use; see 'LICENCE.md' for details.
# https://github.com/squillero/computational-intelligence

import argparse
import logging
from itertools import permutations, product
import numpy as np
import coloredlogs  # I like my log to be colorful
from tqdm import tqdm  # Coolest progress bar




def display(state, legend=None, *, coordinates=False):
    if not legend:
        legend = list()
    legend += [''] * 3
    x, o = state
    for r, c in product(range(3), repeat=2):
        if TICTACTOE_MAP[r, c] in x:
            print("X", end=" ")
        elif TICTACTOE_MAP[r, c] in o:
            print("O", end=" ")
        elif coordinates:
            print(f"{TICTACTOE_MAP[r, c]}", end=" ")
        else:
            print(".", end=" ")
        if c == 2:
            print(f" {legend[r]}")


def winning_position(cells):
    return any(sum(h) == 12 for h in permutations(cells, 3))


def eval_static(state) -> int:
    """Statically evaluate a board: 1 if agent won, -1 if it lost"""
    if winning_position(state[0]):
        return 1
    elif winning_position(state[1]):
        return -1
    else:
        return 0


def next_state(state, action: int):
    """Returns the next state when agent does `action` in `state`"""
    me, opponent = state
    assert len(me) <= len(opponent)
    return frozenset(set(me) | {action}), opponent


def valid_actions(state, agent: int = 0):
    """Returns a list of valid actions"""
    if len(state[agent]) > len(state[1 - agent]):
        return list()
    else:
        return list(set(range(9)) - state[0] - state[1])


def best_action(Q: dict, state):
    if not valid_actions(state):
        return (None, eval_static(state))
    else:
        return max(((a, Q[(state, a)]) for a in valid_actions(state)), key=lambda x: x[1])


def describe_policy(Q, V):
    non_zero = [q for q in Q.items() if q[1] != 0]
    learned = [q for q in Q.items() if q[1] != 0 and q[1] != 1 and q[1] != -1]
    print(f"Found {len(non_zero):,} non zero s/a over {len(Q):,}; {len(learned):,} learned.")
    non_zero = [v for v in V.items() if v[1] != 0]
    learned = [v for v in V.items() if v[1] != 0 and v[1] != 1 and v[1] != -1]
    print(f"Found {len(non_zero):,} non zero values over {len(V):,}; {len(learned):,} learned.")



ModuleNotFoundError: No module named 'coloredlogs'

In [None]:
from itertools import combinations
from collections import namedtuple
import numpy as np

Position = namedtuple('Position', ['x', 'o'])


## Enivironment Setup

In [31]:
class TicTacToe:
    def __init__(self):
        self.board = [2, 7, 6,
                      9, 5, 1, 
                      4, 3, 8]
        self.players = ["X", "O"]
        self.current_state = None
        self.current_player = None
        self.winner = None
        self.game_over = False

    def reset(self):
        self.board =[2, 7, 6,
                      9, 5, 1, 
                      4, 3, 8]
        self.current_player = None
        self.current_state = None
        self.winner = None
        self.game_over = False

    def available_moves(self):
        return set(range(1, 9 + 1)) - self.current_state.x - self.current_state.o

    def make_move(self, move):
        # if self.board[move[0]][move[1]] != 0:
        #     return False
        # self.board[move[0]][move[1]] = self.players.index(self.current_player) + 1

        if self.current_player == self.players[0]:
            self.current_state.x.add(move)
        else:
            self.current_state.o.add(move)
        self.win()
        self.switch_player()
        return True

    def switch_player(self):
        if self.current_player == self.players[0]:
            self.current_player = self.players[1]
        else:
            self.current_player = self.players[0]

    def win(self):
        return any(sum(c) == 15 for c in combinations(self.current_state, 3))

    def print_board(self):
        print("-------------")
        for r in range(3):
            print("-------------")
            for c in range(3):
              
                i = r * 3 + c
                if self.board[i] in self.current_state.x:
                    print("X", end="" " ")
                    
                elif self.board[i] in self.current_state.o:
                    print("O", end="" " ")
                    
                else:
                    print(".", end="" " ")
                   
                print("|", end="" " ")
            print()
        
        print("-------------")

In [32]:
game = TicTacToe()
game.current_state = Position({1,2,4}, {3,5})
game.print_board()

-------------
-------------
| X | | . | | . | 
-------------
| . | | O | | X | 
-------------
| X | | O | | . | 
-------------
