In [121]:
from itertools import combinations
from collections import namedtuple, defaultdict
from random import choice, uniform, randint
from copy import deepcopy
from IPython.display import clear_output
from tqdm.auto import tqdm
import time
import logging
import matplotlib.pyplot as plt 


In [122]:
BOARD_SIZE = 3

In [123]:
class TicTacToeEnvironment:
    def reset(self):
        self.board = [["-" for _ in range(BOARD_SIZE)] for _ in range(BOARD_SIZE)]
        self.AI = 'X'
        self.OPPONENT = 'O'
        self.current_player = self.OPPONENT
      
        return self.board
    

    def check_winner2(self) -> int:
        '''Check the winner. If the game is ended it returns the player symbol of the winner or "tie", otherwise None'''
        winner = None

        # for each row
        for x in range(BOARD_SIZE):
            # if a player has completed an entire row
            if self.board[x][0] != '-' and all(self.board[x][y] == self.board[x][0] for y in range(BOARD_SIZE)):
                winner = self.board[x][0]

        # for each column
        for y in range(BOARD_SIZE):
            # if a player has completed an entire column
            if self.board[0][y] != '-' and all(self.board[x][y] == self.board[0][y] for x in range(BOARD_SIZE)):
                winner = self.board[0][y]

        # if a player has completed the principal diagonal
        if self.board[0][0] != '-' and all(self.board[x][x] == self.board[0][0] for x in range(BOARD_SIZE)):
            winner = self.board[0][0]

        # if a player has completed the secondary diagonal
        if self.board[0][-1] != '-' and all(self.board[x][-(x + 1)] == self.board[0][-1] for x in range(BOARD_SIZE)):
            winner = self.board[0][-1]

            
        open_spots = 0
        for i in range(BOARD_SIZE):
            for j in range(BOARD_SIZE):
                if self.board[i][j] == "-":
                    open_spots += 1

        if winner is None and open_spots == 0:
            return 'tie'
        else:
            return winner

        
    def check_winner(self):
        winner = None

        # Orizzontale
        for i in range(BOARD_SIZE):
            if self.board[i][0] == self.board[i][1] == self.board[i][2] and self.board[i][0] is not None:
                winner = self.board[i][0]

        # Verticale
        for i in range(BOARD_SIZE):
            if self.board[0][i] == self.board[1][i] == self.board[2][i] and self.board[0][i] is not None:
                winner = self.board[0][i]

        # Diagonale
        if self.board[0][0] == self.board[1][1] == self.board[2][2] and self.board[0][0] is not None:
            winner = self.board[0][0]
        if self.board[2][0] == self.board[1][1] == self.board[0][2] and self.board[2][0] is not None:
            winner = self.board[2][0]

        open_spots = 0
        for i in range(BOARD_SIZE):
            for j in range(BOARD_SIZE):
                if self.board[i][j] == "-":
                    open_spots += 1

        if winner is None and open_spots == 0:
            return 'tie'
        else:
            return winner

    def set_curren_player(self):
        self.current_player = self.AI if self.current_player == self.OPPONENT else self.OPPONENT

    def __init__(self):
        self.reset()   


    def render(self):
        for x in self.board:
            print(x)



## MINMAX algorithm

In [124]:

def best_move(board, game):
    best_score = float('-inf')
    move = "-"
    #print('board: ', board)
    for i in range(BOARD_SIZE):
        for j in range(BOARD_SIZE):
            if board[i][j] == "-":
                board[i][j] = game.AI
                score = minimax(board, 0, False, game)
                board[i][j] = "-"
                if score > best_score:
                    best_score = score
                    move = (i, j)

    board[move[0]][move[1]] = game.AI
   

def minimax(board, depth, is_maximizing, game):
    result = game.check_winner()

    if result == game.AI:
        return 1
    elif result == game.OPPONENT:
        return -1
    elif result == 'tie':
        return 0

    if is_maximizing:
        best_score = float('-inf')
        for i in range(BOARD_SIZE):
            for j in range(BOARD_SIZE):
                if board[i][j] == "-":
                    board[i][j] = game.AI
                    score = minimax(board, depth + 1, False, game)
                    board[i][j] = "-"
                    best_score = max(score, best_score)
        return best_score
    else:
        best_score = float('inf')
        for i in range(BOARD_SIZE):
            for j in range(BOARD_SIZE):
                if board[i][j] == "-":
                    board[i][j] = game.OPPONENT
                    score = minimax(board, depth + 1, True, game)
                    board[i][j] = "-"
                    best_score = min(score, best_score)
        return best_score


## Gameplay

In [125]:
env = TicTacToeEnvironment()
x_wins = 0
o_wins = 0
draws = 0
matches = 1000

for episode in tqdm(range(matches)):
    state = env.reset()
   
    
    #print("*****Episode ", episode+1, "******\n\n")
    
    while True:
      
        #print(f"\n*********** Player: {env.current_player} turn!********\n")

    
        if env.current_player == env.OPPONENT:
            move = ()
            while True:
                x = randint(0,2)
                y = randint(0,2)
                move = (x,y)
                if env.board[x][y] == '-':
                    break
        
            if state[move[0]][move[1]] == '-':
                state[move[0]][move[1]] = env.OPPONENT


            env.render()  #remove the '#' for seeing the move
            env.set_curren_player()
            
            result = env.check_winner2()
            if result == "O":
                o_wins += 1
                #print('\tO wins!')
                break
            elif result == "tie":
                draws += 1
                break
          
        else: #AI
            best_move(state, env)

            env.render() #remove the '#' for seeing the move
            env.set_curren_player()

            
            result = env.check_winner2()
            if result == "X":
                x_wins += 1
                #print('\tX wins!')
                break
            elif result == "tie":
                draws += 1
                break


print("Wins: ",x_wins*100/float(matches), "\tDraws: ", draws*100/float(matches), "\tLosses:", o_wins*100/float(matches))    

        

  0%|          | 0/10 [00:00<?, ?it/s]

*****Episode  1 ******



*********** Player: O turn!********

['-', 'O', '-']
['-', '-', '-']
['-', '-', '-']

*********** Player: X turn!********



['X', 'O', '-']
['-', '-', '-']
['-', '-', '-']

*********** Player: O turn!********

['X', 'O', '-']
['-', '-', 'O']
['-', '-', '-']

*********** Player: X turn!********

['X', 'O', '-']
['-', '-', 'O']
['X', '-', '-']

*********** Player: O turn!********

['X', 'O', '-']
['O', '-', 'O']
['X', '-', '-']

*********** Player: X turn!********

['X', 'O', '-']
['O', 'X', 'O']
['X', '-', '-']

*********** Player: O turn!********

['X', 'O', '-']
['O', 'X', 'O']
['X', '-', 'O']

*********** Player: X turn!********

['X', 'O', 'X']
['O', 'X', 'O']
['X', '-', 'O']
	X wins!
*****Episode  2 ******



*********** Player: O turn!********

['-', '-', '-']
['-', '-', '-']
['O', '-', '-']

*********** Player: X turn!********

['-', '-', '-']
['-', 'X', '-']
['O', '-', '-']

*********** Player: O turn!********

['-', '-', 'O']
['-', 'X', '-']
['O', '-', '-']

*********** Player: X turn!********

['-', 'X', 'O']
['-', 'X', '-']
['O', '-', '-']

*********** Player: O turn!********

['-', 'X', 'O']
['-'