In [68]:
import numpy as np

In [None]:
'''
Lớp game có vai trò:
+ Không gian hành động có thể thực hiện từ trạng thái hiện tại
+ Kiểm tra trạng thái kết thúc
+ Nhận phần thưởng khi thực hiện hành động
Lớp player:
+ Từ không gian hành động lớp game trả về -> choose action của riêng player
'''

'\nLớp game có vai trò:\n+ Không gian hành động có thể thực hiện từ trạng thái hiện tại\n+ Kiểm tra trạng thái kết thúc\n+ Nhận phần thưởng khi thực hiện hành động\n'

In [None]:
from abc import ABC, abstractmethod
class Game(ABC):
    def __init__(self) -> None:
        pass
    @abstractmethod
    def get_next_actions(self, state):
        pass
    @abstractmethod
    def is_terminal_state(self, state):
        pass
    @abstractmethod
    def get_reward(self, state, action):
        pass
    @abstractmethod
    def get_next_state(self, state, action):
        pass

In [None]:
from scipy.signal import correlate2d
class TicTacToe(Game):
    def _get_kernel(self, state: np.ndarray):
        '''
        Create kernels of the appropriate size for the state, min is 3, max is 5
        The kernels are unique a row or a column or a diagonal is 1
        '''
        size_board, _ = state.shape
        # arrays containing kernels
        rows_1 = []
        cols_1 = []
        kernel_size = min(size_board, 5)
        for i in range(kernel_size):
            row_1 = np.zeros((kernel_size, kernel_size))
            col_1 = row_1.copy()

            row_1[i,:] = 1
            col_1[:,i] = 1
            
            rows_1.append(row_1)
            cols_1.append(col_1)

        first_diagonal = np.zeros((kernel_size, kernel_size))
        np.fill_diagonal(first_diagonal, 1)

        second_diagonal = np.fliplr(first_diagonal)

        return [*rows_1, *cols_1, first_diagonal, second_diagonal]
    
    def get_next_actions(self, state: np.ndarray):
        '''
        Flatten the input state and return actionable positions: 0 -> size_borad**2-1
        input:
            state: np.ndarray
        output:
            actions: np.ndarray[0 -> state.size()-1]
        '''
        if self.is_terminal_state(state):
            return None
        
        new_state = state.flatten()

        actions = np.argwhere(new_state == 0)[0]

        return actions
    
    def is_terminal_state(self, state: np.ndarray):
        '''
        Translate each kernel, if which kernel makes the value in that area the largest -> terminal there
        '''
        kernels = self._get_kernel()
        size_board, _ = state.shape
        
        for kernel in kernels:
            conv = correlate2d(state, kernel, mode='valid')
            
            result = min(size_board, 5)
            if result in conv or -result in conv:
                return True
        if 0 not in state:
            return True
        
        return False
    
    def get_next_state(self, state: np.ndarray, action: int):
        '''environment returns random state'''
        
        # Check the end status, if true, no action will be taken
        if self.is_terminal_state(state):
            return None

        new_state = state.flatten()

        new_state[action] = 1
        # check the end state, if true, the environment will not issue random actions
        if self.is_terminal_state(new_state):
            return new_state

        next_actions_opponent = self.get_next_actions(new_state)

        if next_actions_opponent:

            random_action = np.random.choice(next_actions_opponent)

            new_state[random_action] = -1

        return new_state

    def get_reward(self, state: list, action: int):

        new_state = state.flatten()

        new_state[action] = 1

        new_state = new_state.reshape(self.size_board, -1)

        if self.is_terminal_state(new_state):
            for kernel in self.kernels:
                conv = correlate2d(state, kernel, mode='valid')
                
                result = min(self.size_board, 5)
                if result in conv:
                    return 1
                if -result in conv:
                    return -1
                
            if 0 not in state:
                return 0
        else:
            return 0

In [None]:
from typing import Literal
from flask import Flask, request, render_template, redirect, url_for, flash

app = Flask(__name__)
class Player(ABC):
    def __init__(self) -> None:
        pass
    @abstractmethod
    def choose_action(self, state):
        pass
class RandomPlayerTicTacToe(Player):
    def __init__(self, name: Literal['X', 'O']) -> None:
        self.name = name
        super().__init__()
    def choose_action(self, state: np.ndarray):
        actions = TicTacToe().get_next_actions(state)

        new_state = state.flatten()
        action = np.random.choice(actions)
        new_state[action] = 1 if self.name == 'X' else -1
        new_state = new_state.reshape(state.shape[0], -1)
        return new_state
class HumanPlayerTicTacToe(Player):
    def __init__(self, name: Literal['X', 'O']) -> None:
        super().__init__()
        self.name = name
    
    @app.route('/human-move', methods=['GET','POST'])
    def choose_action(self, state: np.ndarray):
        if request.method == 'POST':

            size_board = state.shape
            actions = TicTacToe().get_next_actions(state)

            row = int(request.form.get("row"))
            col = int(request.form.get("col"))

            action = (row + 1)*size_board + col
            if action not in actions:
                flash("Invalid location!", "message")
                return None

            new_state[action] = 1 if self.name == 'X' else -1
            new_state = new_state.reshape(state.shape[0], -1)
            return new_state
        
        return redirect(url_for('index'))

In [None]:
@app.route('/')
def index():
    return render_template('home.html')