In [55]:
import pandas as pd
import numpy as np
import random
import tkinter as tk
from tkinter import messagebox
import pickle

## Load functions From Train Codes

#### Load Q table

In [56]:
def load_q_table(file_name="q_table.pkl"):
    try:
        with open(file_name, "rb") as f:
            return pickle.load(f)
    except FileNotFoundError:
        return initialize_q_table()

In [57]:
def create_tic_tac_toe():
    
    ##### empty 3x3 board
    board = np.zeros((3, 3), dtype=int)
    
    ##### action space (all possible positions on the board)
    action_space = [(i, j) for i in range(3) for j in range(3)]
    
    return board, action_space

In [58]:
def check_game_status(board):
    ###### Check rows and columns for a win
    for i in range(3):
        if np.all(board[i, :] == 1) or np.all(board[:, i] == 1):
            return 1  ######## AI wins
        if np.all(board[i, :] == -1) or np.all(board[:, i] == -1):
            return -1  ####### Human wins
    
    ######## Check diagonals for a win
    if np.all(np.diag(board) == 1) or np.all(np.diag(np.fliplr(board)) == 1):
        return 1  ##### AI wins
    if np.all(np.diag(board) == -1) or np.all(np.diag(np.fliplr(board)) == -1):
        return -1  ###### Human wins
    
    ###### Check for a draw (no empty spaces left)
    if not np.any(board == 0):
        return 0  # Draw
    
    #### Game is ongoing
    return None

In [59]:
import pickle
def save_q_table(q_table, file_name="q_table.pkl"):
    with open(file_name, "wb") as f:
        pickle.dump(q_table, f)

In [60]:
def state_to_key(board):
    return ''.join(map(str, board.flatten()))

In [61]:
def epsilon_greedy_action(q_table, state_key, epsilon, valid_actions):
    if random.random() < epsilon:
        ##### Explore: Choose a random valid action
        return random.choice(valid_actions)
    else:
        ##### Exploit: Choose the action with the highest Q-value
        if state_key in q_table:
            q_values = q_table[state_key]
            return max(valid_actions, key=lambda action: q_values.get(action, 0))
        else:
            ### If state not in Q-table, randomly explore
            return random.choice(valid_actions)

In [62]:
def update_q_value(q_table, state_key, action, reward, next_state_key, alpha, gamma, valid_actions):
    if state_key not in q_table:
        q_table[state_key] = {a: 0 for a in valid_actions}
    if next_state_key not in q_table:
        q_table[next_state_key] = {a: 0 for a in valid_actions}

    max_future_q = max(q_table[next_state_key].values())
    current_q = q_table[state_key][action]
    q_table[state_key][action] = current_q + alpha * (reward + gamma * max_future_q - current_q)

## GUI

### Code for GUI

##### We have to Define functions  for GUI

##### 1. Reset Board

In [63]:
"""
Resets the board and GUI buttons for a new game.

para:
    gui_buttons (list): List of button widgets.
    board (np.array): The game board.
"""
def reset_board(gui_buttons, board):
    for i in range(3):
        for j in range(3):
            gui_buttons[i][j].config(text="", state=tk.NORMAL)
    board.fill(0)

###### 2. Handel Human move

In [64]:
"""
Handles button click (human's move).

Args:
    row (int): Row index of the clicked button.
    col (int): Column index of the clicked button.
    gui_buttons (list): List of button widgets.
    board (np.array): The game board.
    q_table (dict): The Q-table.
    ai_player (int): Value representing the AI player.
    human_player (int): Value representing the human player.
    
"""
def handle_click(row, col, gui_buttons, board, q_table, ai_player, human_player):
    if board[row, col] == 0:
        ######### Human move   #######################
        board[row, col] = human_player
        gui_buttons[row][col].config(text="X", state=tk.DISABLED)

        ############## Check if the game ended ################
        status = check_game_status(board)
        if status is not None:
            show_result(status, gui_buttons, board, q_table, ai_player, human_player)
            return

        ################ AI move  ############################
        ai_move(board, gui_buttons, q_table, ai_player)

        ############### Check if the game ended #########################
        status = check_game_status(board)
        if status is not None:
            show_result(status, gui_buttons, board, q_table, ai_player, human_player)

###### 3. Handel AI move

In [65]:
"""
Handles the AI's move.

Args:
    board (np.array): The game board.
    gui_buttons (list): List of button widgets.
    q_table (dict): The Q-table.
    ai_player (int): Value representing the AI player.
"""
def ai_move(board, gui_buttons, q_table, ai_player):
    state_key = state_to_key(board)
    valid_actions = [(i, j) for i in range(3) for j in range(3) if board[i, j] == 0]
    action = epsilon_greedy_action(q_table, state_key, epsilon=0.01, valid_actions=valid_actions)
    board[action[0], action[1]] = ai_player
    gui_buttons[action[0]][action[1]].config(text="O", state=tk.DISABLED)

#### 4. Display game Result

In [66]:
"""
Displays the game result and resets the board.

Args:
    status (int): The game status (-1 for loss, 0 for draw, 1 for win).
    gui_buttons (list): List of button widgets.
    board (np.array): The game board.
    q_table (dict): The Q-table.
    ai_player (int): Value representing the AI player.
    human_player (int): Value representing the human player.
"""
def show_result(status, gui_buttons, board, q_table, ai_player, human_player):
    if status == ai_player:
        messagebox.showinfo("Result", "AI wins!")
        reward = -1
    elif status == human_player:
        messagebox.showinfo("Result", "You win!")
        reward = 1
    else:
        messagebox.showinfo("Result", "It's a draw!")
        reward = 0

    # Update Q-table based on the game result
    update_q_table(q_table, board, reward, ai_player, human_player)

    # Reset board for a new game
    reset_board(gui_buttons, board)

##### 5. Update Q table

In [67]:
"""
Updates the Q-table at the end of a game.

Args:
    q_table (dict): The Q-table.
    board (np.array): The game board.
    reward (float): The game reward.
    ai_player (int): Value representing the AI player.
    human_player (int): Value representing the human player.
"""
def update_q_table(q_table, board, reward, ai_player, human_player):
    state_key = state_to_key(board)
    valid_actions = [(i, j) for i in range(3) for j in range(3) if board[i, j] == 0]
    if state_key not in q_table:
        q_table[state_key] = {action: 0 for action in valid_actions}
    for action in valid_actions:
        update_q_value(q_table, state_key, action, reward, None, alpha=0.1, gamma=0.9, valid_actions=valid_actions)

##### 6. GUI Initialization

In [69]:
def run_game():
    q_table = load_q_table()
    board, _ = create_tic_tac_toe()
    root = tk.Tk()
    root.title("Tic-Tac-Toe")

    gui_buttons = [[None for _ in range(3)] for _ in range(3)]

    for i in range(3):
        for j in range(3):
            gui_buttons[i][j] = tk.Button(root, text="", font=("Helvetica", 20), width=5, height=2,
                                          command=lambda i=i, j=j: handle_click(i, j, gui_buttons, board, q_table, 1, -1))
            gui_buttons[i][j].grid(row=i, column=j)

    root.mainloop()
    save_q_table(q_table)  #### Save the Q-table at the end of the session

run_game()