In [None]:
import random
import numpy as np
import tqdm


In [None]:
class tic_tac_toe_model():
    def __init__(self, n):
        self.n = n
        self.matriz = np.full((n,n),0,dtype=int)
    
    def reset_matrix(self):
        self.matriz = np.full((self.n,self.n),0,dtype=int)

    def print_game(self):
        matriz = self.matriz
        substituicoes = {1: "X", 2: "O", 0:' '}
        for linha in matriz:
            linha_formatada = [str(substituicoes.get(valor, valor)) for valor in linha]
            print(" | ".join(linha_formatada))
            print("-" * 9)
    

    def number_ij(self, number):
        i,j = np.unravel_index(number, self.matriz.shape)
        return i,j
    
    
    def get_avaible_moves(self):
        avaible_moves = np.ravel_multi_index(np.where(self.matriz == 0), self.matriz.shape)
        return list(avaible_moves)

    def get_random_move(self, piece):
        possible_move_i, possible_move_j = np.where(self.matriz == 0)
        if possible_move_j.shape[0] > 0:  # Verifica se há movimentos possíveis
            index = random.randint(0, possible_move_j.shape[0] - 1)  # Correção aqui
            self.move(possible_move_i[index], possible_move_j[index], piece)



    def move(self,index_i,index_j,piece):
        if self.matriz[index_i][index_j] == 0:
            self.matriz[index_i][index_j] = piece
        
    
    def reward_piece(self,piece):
        w = self.check_win()
        if w != 4 :
            if w!=3:
                if w == piece:
                    return 1
                else:
                    return -1
            
        return 0.1
          
    def check_win(self):
        state = False
        win_piece = -1
        value_counts_diagonal = np.unique(self.matriz.diagonal())
        value_counts_diagonal2  = np.unique(np.fliplr(self.matriz).diagonal())
        if value_counts_diagonal.shape[0] == 1 and value_counts_diagonal[0] !=0:
            state=True     
            win_piece = value_counts_diagonal[0] 
            return win_piece          
        if value_counts_diagonal2.shape[0] == 1 and value_counts_diagonal2[0] !=0:
            state=True    
            win_piece = value_counts_diagonal2[0]   
            return win_piece         

        for i in range(0,self.n):
            value_counts_linha = np.unique(self.matriz[i,:])
            value_counts_coluna = np.unique(self.matriz[:,i])
            
            if value_counts_linha.shape[0] == 1 and value_counts_linha[0] != 0 :
                state=True
                win_piece = value_counts_linha[0]
                break
            if value_counts_coluna.shape[0] == 1 and value_counts_coluna[0] != 0:
                state=True
                win_piece = value_counts_coluna[0]
                break
            
        velha = np.where(self.matriz == 0)
        
        if state:
            return win_piece
        if velha[0].shape[0] == 0: 
            return 3
        else:
            return 4      

In [None]:
class QlearningAgent():
    def __init__(self, alpha, epsilon, discount_factor):
        self.q_table = {}
        self.alpha = alpha
        self.epsilon = epsilon
        self.discount_factor = discount_factor


    def print_q_values(self):
        for key in self.q_table.keys():
            print(f"Key {key} Q-value{self.q_table[key]}")

            
    def get_q_value(self, state, action, piece):
        state_tuple = tuple(state.flatten())
        if (state_tuple, action,piece) not in self.q_table:
            self.q_table[(state_tuple, action, piece)] = 0.0

        return self.q_table[(state_tuple, action, piece)]

    def choose_move(self, state, available_moves, piece):
        q_values = []
        for action in available_moves:
            q_values.append(self.get_q_value(state, action, piece))
        if random.uniform(0, 1) < self.epsilon:
            return random.choice(available_moves)
        else:
            max_q_value = max(q_values)
            if q_values.count(max_q_value) > 1:
                best_moves = [i for i in range(len(available_moves)) if q_values[i] == max_q_value]
                i = random.choice(best_moves)
            else:
                i = q_values.index(max_q_value)
            return available_moves[i]

    def update_q_value(self, state, action, piece, reward, next_state, next_moves):
        next_q_values = []
        for next_action in next_moves:
            next_q_values.append(self.get_q_value(next_state, next_action, piece))
        
        max_next_q = max(next_q_values) if next_q_values else 0.0
        state_tuple = tuple(state.flatten())

        self.q_table[(state_tuple, action, piece)] += self.alpha * (reward + self.discount_factor * max_next_q - self.q_table[(state_tuple, action, piece)])


In [39]:
class train():
    def __init__(self, tic_tac_toe: tic_tac_toe_model, q_agent: QlearningAgent):
        self.board = tic_tac_toe
        self.q_agent = q_agent
    
    
    def play_one_game(self, piece):
        game_over = False
        win_piece = 0
        self.board.reset_matrix()
        if piece == 1:
            piece_enemy = 2
            while not game_over:
                w = self.board.check_win()
                if w != 4:
                    win_piece = w
                    break
                state = self.board.matriz.copy()
                avaible_moves = self.board.get_avaible_moves()
                action = self.q_agent.choose_move(state,avaible_moves,piece)
                i, j = self.board.number_ij(action)
                self.board.move(i,j,piece)
                if w != 4:
                    win_piece = w
                    break
                self.board.get_random_move(piece_enemy)
                next_moves = self.board.get_avaible_moves()
                next_state = self.board.matriz.copy()
                reward = self.board.reward_piece(piece)
               
                self.q_agent.update_q_value(state,action,piece,reward,next_state,next_moves)
                
            return win_piece
        else:
            piece_enemy = 1
            while not game_over:
                w = self.board.check_win()
                
                self.board.get_random_move(piece_enemy)
                if w != 4:
                    win_piece = w
                    break
                
                state = self.board.matriz.copy()
                avaible_moves = self.board.get_avaible_moves()
                action = self.q_agent.choose_move(state,avaible_moves,piece)
                i, j = self.board.number_ij(action)
                self.board.move(i,j,piece)
                if w != 4:
                    win_piece = w
                    break
                
                self.board.get_random_move(piece_enemy)

                if w != 4:
                    win_piece = w
                    break
                
                next_moves = self.board.get_avaible_moves()
                next_state = self.board.matriz.copy()
                reward = self.board.reward_piece(piece)
               
                self.q_agent.update_q_value(state,action,piece,reward,next_state,next_moves)
                
            return win_piece
            
    def play_ia_vs_ia(self):
        game_over = False
        self.board.reset_matrix()
        ia_x = 1
        ia_o = 2
        while not game_over:
            w = self.board.check_win()
            if w != 4:
                win_piece = w
                break
            state_x = self.board.matriz.copy()
            avaible_moves_x = self.board.get_avaible_moves()
            action_x = self.q_agent.choose_move(state_x,avaible_moves_x,ia_x)
            i, j = self.board.number_ij(action_x)
            self.board.move(i,j,ia_x) # x play
            if w != 4:
                win_piece = w
                break

            state_o = self.board.matriz.copy()
            avaible_moves_o = self.board.get_avaible_moves()
            action_o = self.q_agent.choose_move(state_o,avaible_moves_o,ia_o)
            i, j = self.board.number_ij(action_o)
            self.board.move(i,j,ia_o) # o play
            if w != 4:
                win_piece = w
                break

            #x q_Table update 
            next_moves_x = self.board.get_avaible_moves()
            next_state_x = self.board.matriz.copy()
            reward_x = self.board.reward_piece(ia_x)
            self.q_agent.update_q_value(state_x,action_x,ia_x,reward_x,next_state_x,next_moves_x)
            

            #x play again to update q table o
            avaible_moves_x = self.board.get_avaible_moves()
            action_x = self.q_agent.choose_move(next_state_x,avaible_moves_x,ia_x)# next_state x is the atual state of board
            i, j = self.board.number_ij(action_x)
            self.board.move(i,j,ia_x) # x play
            if w != 4:
                win_piece = w
                break

            #o qtable update
            next_moves_o = self.board.get_avaible_moves()
            next_state_o = self.board.matriz.copy()
            reward_o = self.board.reward_piece(ia_x)
            self.q_agent.update_q_value(state_o,action_o,ia_o,reward_o,next_state_o,next_moves_o)

        return win_piece
    
    
    def run(self, n):
        wins_x = []
        wins_o = []
        wins_ia = []
        print(f'Playing {n} games with X')
        for i in tqdm.tqdm(range(0,n)):
            wins_x.append(self.play_one_game(piece=1))
        print(f'Playing {n} games with O')
        for i in tqdm.tqdm(range(0,n)):
            wins_o.append(self.play_one_game(piece=2))

        print(f'Playing {n} games ia vs ia')
        for i in tqdm.tqdm(range(0,n)):
            wins_ia.append(self.play_ia_vs_ia())
        return wins_x,wins_o, wins_ia

In [14]:
class Game():
    def __init__(self, tic_tac_toe: tic_tac_toe_model, q_agent: QlearningAgent):
        self.board = tic_tac_toe
        self.q_agent = q_agent
    

    def ia_vs_ia(self):
        game_over = False
        self.board.reset_matrix()
        while not game_over:
            w = self.board.check_win()
            if w != 4:
                win_piece = w
                break
            state = self.board.matriz.copy()
            avaible_moves = self.board.get_avaible_moves()
            action = self.q_agent.choose_move(state,avaible_moves,1)
            i, j = self.board.number_ij(action)
            self.board.move(i,j,1)
            if w != 4:
                win_piece = w
                break

            state = self.board.matriz.copy()
            avaible_moves = self.board.get_avaible_moves()
            action = self.q_agent.choose_move(state,avaible_moves,2)
            i, j = self.board.number_ij(action)
            self.board.move(i,j,2)

            
        return win_piece
        
    def run_ia_vs_ia(self,n):
        games = []
        for i in tqdm.tqdm(range(0,n)):
            w = self.ia_vs_ia()
            games.append(w)
        return games
    
    def ia_vs_user(self):
        print('Menu')
        print("1 - Iniciar aleatorio")
        print("2 - Escolher peça [X-O]")
        m1 = int(input())
        while m1 != 1 and m1 !=2:
            print('Insira um valor valido 1-2')
            print("1 - Iniciar aleatorio")
            print("2 - Escolher peça [X-O]")
            m1 = int(input())

        game_over = False
        self.board.reset_matrix()
        pieces = [1,2]
        win_piece = 0
        if m1 == 1 :
            user = random.choice(pieces)    
        else:
            print('1 - X \n 2 - O')
            user = int(input())
            while user != 1 and user !=2:
                print("Insira um valor válido 1-2")
                print('1 - X \n 2 - O')
                user = int(input())
                

        ia = 2 if user == 1 else 1
        print("Começo de jogo")
        self.board.print_game()
        if ia == 1:
            while not game_over:
                w = self.board.check_win()
                if w != 4:
                    win_piece = w
                    break
                state = self.board.matriz.copy()
                avaible_moves = self.board.get_avaible_moves()
                action = self.q_agent.choose_move(state,avaible_moves,ia)
                i, j = self.board.number_ij(action)
                print('Jogada da IA - X')
                self.board.move(i,j,ia)
                self.board.print_game()
                if w != 4:
                    win_piece = w
                    break
                print("Jogue Usuario - O")
                mv = int(input())
                i,j = self.board.number_ij(mv)
                self.board.move(i,j,user)
                self.board.print_game()

        else:
             while not game_over:
                w = self.board.check_win()
                if w != 4:
                    win_piece = w
                    break

                print('Jogue Usuario - X')
                mv = int(input())
                i,j = self.board.number_ij(mv)
                self.board.move(i,j,user)
                self.board.print_game()

                print('Jogada da IA - O')
                state = self.board.matriz.copy()
                avaible_moves = self.board.get_avaible_moves()
                action = self.q_agent.choose_move(state,avaible_moves,ia)
                i, j = self.board.number_ij(action)
                self.board.move(i,j,ia)
                self.board.print_game()

                if w != 4:
                    win_piece = w
                    break
               

        if win_piece == ia :
            print("IA venceu Humano Fraco")
        elif win_piece == user :
            print("Humano venceu, esta preparado para a revolução?")
        else:
            print('Deu velha, mas a I.A segue aprendendo e melhorando e você?')
        return win_piece
        

In [58]:
board = tic_tac_toe_model(3)
q_agent = QlearningAgent(alpha=0.1, epsilon=0.5, discount_factor=0.9)
exp = train(board,q_agent)
n = 100
wins_x,wins_o ,wins_ia = exp.run(n)


Playing 100 games with X


100%|██████████| 100/100 [00:00<00:00, 1525.39it/s]


Playing 100 games with O


100%|██████████| 100/100 [00:00<00:00, 2084.28it/s]


Playing 100 games ia vs ia


100%|██████████| 100/100 [00:00<00:00, 1483.84it/s]


In [59]:
print("IA VS IA")
games = np.array(wins_ia)
print('IA[X] vs IA[O]')
print(f"Jogos ganhos[x]: {np.where(games==1)[0].shape[0]}\n",
      f"Jogos perdidos[x]: {np.where(games==2)[0].shape[0]}\n",
      f"Jogos empatados: {np.where(games==3)[0].shape[0]}\n")


print('IA[X] vs Aleatorio ')
games = np.array(wins_x)
print(f"Jogos ganhos: {np.where(games==1)[0].shape[0]}\n",
      f"Jogos perdidos: {np.where(games==2)[0].shape[0]}\n",
      f"Jogos empatados: {np.where(games==3)[0].shape[0]}\n")

print('IA[0] vs Aleatorio ')
games = np.array(wins_o)
print(f"Jogos ganhos: {np.where(games==1)[0].shape[0]}\n",
      f"Jogos perdidos: {np.where(games==2)[0].shape[0]}\n",
      f"Jogos empatados: {np.where(games==3)[0].shape[0]}\n")



IA VS IA
IA[X] vs IA[O]
Jogos ganhos[x]: 95
 Jogos perdidos[x]: 5
 Jogos empatados: 0

IA[X] vs Aleatorio 
Jogos ganhos: 63
 Jogos perdidos: 28
 Jogos empatados: 9

IA[0] vs Aleatorio 
Jogos ganhos: 89
 Jogos perdidos: 11
 Jogos empatados: 0



In [19]:
game = Game(board, q_agent)

In [20]:
game.ia_vs_user()

Menu
1 - Iniciar aleatorio
2 - Escolher peça [X-O]
1 - X 
 2 - O
Começo de jogo
  |   |  
---------
  |   |  
---------
  |   |  
---------
Jogada da IA - X
  |   |  
---------
  | X |  
---------
  |   |  
---------
Jogue Usuario - O
O |   |  
---------
  | X |  
---------
  |   |  
---------
Jogada da IA - X
O |   | X
---------
  | X |  
---------
  |   |  
---------
Jogue Usuario - O
O |   | X
---------
  | X |  
---------
O |   |  
---------
Jogada da IA - X
O |   | X
---------
X | X |  
---------
O |   |  
---------
Jogue Usuario - O
O |   | X
---------
X | X | O
---------
O |   |  
---------
Jogada da IA - X
O |   | X
---------
X | X | O
---------
O | X |  
---------
Jogue Usuario - O
O |   | X
---------
X | X | O
---------
O | X |  
---------
Jogada da IA - X
O | X | X
---------
X | X | O
---------
O | X |  
---------
Jogue Usuario - O
O | X | X
---------
X | X | O
---------
O | X |  
---------
IA venceu Humano Fraco


1

In [None]:
game.run_ia_vs_ia(10)