<a href="https://colab.research.google.com/github/NikolaZubic/AppliedGameTheoryHomeworkSolutions/blob/main/domaci1_dodatni.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PRVI DODATNI DOMAĆI ZADATAK iz predmeta "Primenjena teorija igara" (Applied Game Theory)

Razvoj bota za igranje igre Iks-Oks (Tic Tac Toe) koristeći "MiniMax" algoritam.

# Potrebni import-i

In [None]:
import gym
import numpy as np
from gym import spaces

# Definisanje Iks-Oks okruženja koristeći "Open AI Gym" toolkit

In [None]:
class TicTacToeEnvironment(gym.Env):
    # Because of human-friendly output
    metadata = {'render.modes': ['human']}

    def __init__(self, player_1, player_2):
        """
        Board is predefined to a 3 x 3 grid.
        We keep track of whether the game is over.
        When initializing the TicTacToeEnvironment, we set game_over flag to 'False'.
        :param player_1: First player
        :param player_2: Second player
        """
        self.observation_space = spaces.Discrete(3 * 3)
        self.action_space = spaces.Discrete(9)
        self.board = np.zeros((3, 3))
        self.player_1 = player_1
        self.player_2 = player_2
        self.game_over = False
        self.reset()

        # Let player_1 play first
        self.current_player = 1

        # Board string representation
        self.board_str = None

    def reset(self):
        # Resets the environment after one game.
        self.board = np.zeros((3, 3))  # set board to zeros
        self.board_str = None  # set board string representation to null
        self.game_over = False
        self.current_player = 1

    def get_board(self):
        # getter for current board
        return self.board

    def get_board_str(self):
        # synchronize board string representation with current board state
        self.board_str = str(self.board.reshape(3 * 3))
        return self.board_str

    def get_free_positions(self):
        # return positions on the board that are free / not occupied
        positions = [(i, j) for i in range(3) for j in range(3) if self.board[i, j] == 0]
        return positions

    def update_state(self, new_position):
        """
        Update the current state of the board. First player puts '1' on the board, second player puts '-1' on the board.

        :param new_position: from set { (0, 0), (0, 1), (0, 2), (1, 0,), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2) }
        :return: None
        """
        self.board[new_position] = self.current_player

        # If current player puts 1 ("X") on the board, next move is player 2 who puts -1 "O" on the board.
        if self.current_player == 1:
            self.current_player = -1
        else:
            self.current_player = 1

    def check_game_status(self):
        """
        Check if game has finished.

        :return: 1 if player 1 has won,
                 -1 if player 2 has won,
                 0 if draw,
                 None if game hasn't finished.
        """
        for i in range(3):
            if sum(self.board[i, :]) == 3:
                self.game_over = True
                return 1
            if sum(self.board[i, :]) == -3:
                self.game_over = True
                return - 1

        for i in range(3):
            if sum(self.board[:, i]) == 3:
                self.game_over = True
                return 1
            if sum(self.board[:, i]) == -3:
                self.game_over = True
                return -1

        main_diagonal_sum = sum([self.board[i, i] for i in range(3)])
        anti_diagonal_sum = sum([self.board[i, 3 - i - 1] for i in range(3)])
        diagonal_sum = max(abs(main_diagonal_sum), abs(anti_diagonal_sum))

        if diagonal_sum == 3:
            self.game_over = True

            if diagonal_sum == 3 or anti_diagonal_sum == 3:
                return 1
            else:
                return - 1

        # DRAW
        if len(self.get_free_positions()) == 0:
            self.game_over = True
            return 0

        self.game_over = False

        return None

    def step(self, action):
        """
        Performs one action.

        :param action: from set { (0, 0), (0, 1), (0, 2), (1, 0,), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2) }
        :return: current board state, reward, boolean indicating whether the game is over,
                 information about won the game if over
        """
        if self.game_over:
            return self.board, 0, True, None

        self.update_state(action)
        current_game_status = self.check_game_status()

        if current_game_status is not None:
            if current_game_status == 1:
                reward = 1
                info = {"Result": "Player 1 won the game."}
            elif current_game_status == -1:
                reward = -1
                info = {"Result": "Player 2 won the game."}
            else:
                # DRAW
                reward = 0
                info = {"Result": "Draw."}
            return self.board, reward, self.game_over, info
        return self.board, None, self.game_over, None

    def render(self):
        for i in range(0, 3):
            print('-------------')
            out = '| '
            for j in range(0, 3):
                token = ''
                if self.board[i, j] == 1:
                    token = 'X'
                if self.board[i, j] == -1:
                    token = 'O'
                if self.board[i, j] == 0:
                    token = ' '
                out += token + ' | '
            print(out)
        print('-------------')


# MiniMax algoritam

In [None]:
def minimax(current_board, depth, is_maximizing, computer_symbol, player_simbol):
    if is_maximizing:
        value_max = - np.inf
        for i in range(3):
            for j in range(3):
                if current_board[i][j] == 0:
                    current_board[i][j] = computer_symbol
                    val = minimax(current_board, depth + 1, False, computer_symbol, player_simbol)
                    current_board[i][j] = 0  # undo
                    value_max = max(val, value_max)
        return value_max

    else:
        value_max = np.inf
        for i in range(3):
            for j in range(3):
                if current_board[i][j] == 0:
                    current_board[i][j] = player_simbol
                    val = minimax(current_board, depth + 1, True, computer_symbol, player_simbol)
                    current_board[i][j] = 0  # undo
                    value_max = min(val, value_max)
        return value_max


# Definisanje MiniMax agenta

In [None]:
class MiniMaxAgent(object):

    @staticmethod
    def get_max_action(current_board, symbol):
        """
        Choose best action.

        :param current_board: current environment board
        :param symbol: current player symbol (1 = "X" = PC and -1 = "O" = HUMAN)
        :return: action, for example (1, 1) which is in the center of 3x3 grid
        """
        value_max = - np.inf
        action = None

        for i in range(3):
            for j in range(3):
                if current_board[i][j] == 0:
                    current_board[i][j] = symbol
                    player_simbol = -1 if symbol == 1 else 1

                    val = minimax(current_board=current_board, depth=1, is_maximizing=False, computer_symbol=symbol,
                                  player_simbol=player_simbol)
                    current_board[i][j] = 0  # undo

                    if val > value_max:
                        value_max = val
                        action = (i, j)

        return action


# Definisanje igrača, unos sa tastature

In [None]:
class HumanPlayer(object):
    @staticmethod
    def act(positions):
        while True:
            user_input = input("['O' on move] x,y: ")
            x, y = user_input.split(",")
            x_int, y_int = int(x) - 1, int(y) - 1
            pos = (x_int, y_int)
            if pos in positions:
                return pos
            else:
                print("Invalid move. Try again.")

# Pokretanje igre

In [None]:
def play_game():
    player_1 = MiniMaxAgent()
    player_2 = HumanPlayer()
    agents = [player_1, player_2]

    environment = TicTacToeEnvironment(player_1, player_2)
    environment.reset()

    is_done = False
    environment.render()

    while not is_done:
        for agent in agents:
            if agent == player_1:
                action = agent.get_max_action(environment.get_board(), environment.current_player)
            else:
                action = agent.act(environment.get_free_positions())
            state, reward, is_done, information = environment.step(action)
            environment.render()

            if is_done:
                print(information['Result'])
                break

# Glavni program

In [None]:
play_game()

-------------
|   |   |   | 
-------------
|   |   |   | 
-------------
|   |   |   | 
-------------
-------------
| X |   |   | 
-------------
|   |   |   | 
-------------
|   |   |   | 
-------------
['O' on move] x,y: 1,2
-------------
| X | O |   | 
-------------
|   |   |   | 
-------------
|   |   |   | 
-------------
-------------
| X | O | X | 
-------------
|   |   |   | 
-------------
|   |   |   | 
-------------
['O' on move] x,y: 2,1
-------------
| X | O | X | 
-------------
| O |   |   | 
-------------
|   |   |   | 
-------------
-------------
| X | O | X | 
-------------
| O | X |   | 
-------------
|   |   |   | 
-------------
['O' on move] x,y: 3,1
-------------
| X | O | X | 
-------------
| O | X |   | 
-------------
| O |   |   | 
-------------
-------------
| X | O | X | 
-------------
| O | X | X | 
-------------
| O |   |   | 
-------------
['O' on move] x,y: 3,3
-------------
| X | O | X | 
-------------
| O | X | X | 
-------------
| O |   | O | 
-------------