<a href="https://colab.research.google.com/github/Sonalkumari05/tic_tac_toe_game/blob/main/reinforcement.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [62]:
import json
import random

In [63]:
# Constants
EMPTY = ' '
PLAYER_X = 'X'
PLAYER_O = 'O'
LEARNING_RATE = 0.1
DISCOUNT_FACTOR = 0.9
EXPLORATION_PROBABILITY = 0.2
NUM_EPISODES = 10000


In [64]:
def is_winner(board, player):
    # Check rows and columns
    for i in range(3):
        if all(cell == player for cell in board[i]) or all(board[j][i] == player for j in range(3)):
            return True

    # Check diagonals
    if all(board[i][i] == player for i in range(3)) or all(board[i][2 - i] == player for i in range(3)):
        return True

    # No win found
    return False

In [65]:
def is_full(board):
    # Iterate through each row and cell in the row using nested loops
    # Check if every cell is not empty
    return all(cell != EMPTY for row in board for cell in row)

In [66]:
def get_empty_cells(board):
    # Use list comprehension to generate a list of empty cell coordinates
    # Coordinates are represented as tuples (i, j) where i is the row index and j is the column index
    return [(i, j) for i in range(3) for j in range(3) if board[i][j] == EMPTY]

In [67]:
def get_state_key(board):
    # Convert the board to a string to create a unique key for the current state
    return str(board)


In [68]:
def get_possible_moves(board):
    # Utilize the get_empty_cells function to obtain the list of possible moves
    return get_empty_cells(board)


In [69]:
def choose_action(board, q_values):
    # Exploration: Randomly choose a move with a probability of EXPLORATION_PROBABILITY
    if random.uniform(0, 1) < EXPLORATION_PROBABILITY:
        return random.choice(get_possible_moves(board))
    # Exploitation: Choose the move with the highest Q-value based on the current state
    else:
        state_key = get_state_key(board)
        possible_moves = get_possible_moves(board)
        # Use the max function to find the move with the highest Q-value (or default to 0 if not present)
        return max(possible_moves, key=lambda move: q_values.get((state_key, move), 0))


In [70]:
def update_q_values(q_values, state, action, reward, next_state):
    # Convert the current state and action to keys
    state_key = get_state_key(state)
    action_key = tuple(action)

    # Q-learning update rule
    q_values[state_key, action_key] = q_values.get((state_key, action_key), 0) + \
        LEARNING_RATE * (reward + DISCOUNT_FACTOR * max((q_values.get((get_state_key(next_state), m), 0) for m in get_possible_moves(next_state)), default=0) -
                        q_values.get((state_key, action_key), 0))


In [71]:
def save_q_values(q_values):
    # Convert tuple keys to strings for JSON serialization
    q_values_str_keys = {str(key): value for key, value in q_values.items()}

    # Write the Q-values to a JSON file with indentation for readability
    with open('q_values.json', 'w') as file:
        json.dump(q_values_str_keys, file, indent=2)

In [72]:
def train_q_learning():
    # Initialize an empty dictionary to store Q-values
    q_values = {}

    # Iterate over the specified number of episodes
    for episode in range(NUM_EPISODES):
        # Initialize a new empty Tic-Tac-Toe board
        board = [[EMPTY for _ in range(3)] for _ in range(3)]

        # Start the episode with Player X
        current_player = PLAYER_X

        # Continue playing until the board is full or there is a winner
        while not is_full(board) and not is_winner(board, PLAYER_X) and not is_winner(board, PLAYER_O):
            # Choose an action using the Q-learning agent
            action = choose_action(board, q_values)

            # Update the board with the chosen action for the current player
            i, j = action
            board[i][j] = current_player

            # Determine the reward based on the game state
            if is_winner(board, current_player):
                reward = 1
            elif is_full(board):
                reward = 0
            else:
                reward = 0

            # Update Q-values based on the Q-learning update rule
            update_q_values(q_values, board, action, reward, board)

            # Switch to the other player for the next move
            current_player = PLAYER_O if current_player == PLAYER_X else PLAYER_X

    # Save the learned Q-values to a JSON file
    save_q_values(q_values)

    # Return the learned Q-values
    return q_values

In [73]:
trained_q_values = train_q_learning()

In [74]:
def choose_action(board, q_values):
    # Convert the current state to a string key
    state_key = str(board)

    # Get a list of possible moves (empty cells)
    possible_moves = get_empty_cells(board)

    # Choose the move with the highest Q-value based on the current state
    return max(possible_moves, key=lambda move: q_values.get((state_key, move), 0))


In [75]:
def human_move(board):
    while True:
        try:
            # Prompt the user to enter the row and column
            row = int(input("Enter the row (0, 1, or 2): "))
            col = int(input("Enter the column (0, 1, or 2): "))

            # Check if the selected cell is empty
            if board[row][col] == EMPTY:
                return row, col
            else:
                print("Cell is already occupied. Try again.")
        except ValueError:
            print("Invalid input. Please enter a number.")


In [76]:
def print_board(board):
    for row in board:
        # Print each row with cell values separated by '|'
        print('|'.join(row))
        # Print a line of dashes to separate rows
        print('-' * 5)


In [77]:
def load_q_values(filename):
    with open(filename, 'r') as file:
        # Use the json.load function to read Q-values from the file
        return json.load(file)


In [78]:
def play_game(q_values):
    board = [[EMPTY for _ in range(3)] for _ in range(3)]

    while True:
        print_board(board)

        # Human player's move
        human_row, human_col = human_move(board)
        board[human_row][human_col] = PLAYER_X

        if is_winner(board, PLAYER_X):
            print_board(board)
            print("Congratulations! You win!")
            break
        elif is_full(board):
            print_board(board)
            print("It's a tie!")
            break

        # AI's move
        print("AI's turn:")
        ai_row, ai_col = choose_action(board, q_values)
        board[ai_row][ai_col] = PLAYER_O

        if is_winner(board, PLAYER_O):
            print_board(board)
            print("AI wins! Better luck next time.")
            break
        elif is_full(board):
            print_board(board)
            print("It's a tie!")
            break

In [79]:
# Load Q-values from the JSON file
q_values = load_q_values('q_values.json')

# Start the game
play_game(q_values)

 | | 
-----
 | | 
-----
 | | 
-----
Enter the row (0, 1, or 2): 0
Enter the column (0, 1, or 2): 0
AI's turn:
X|O| 
-----
 | | 
-----
 | | 
-----
Enter the row (0, 1, or 2): 1
Enter the column (0, 1, or 2): 1
AI's turn:
X|O|O
-----
 |X| 
-----
 | | 
-----
Enter the row (0, 1, or 2): 1
Enter the column (0, 1, or 2): 0
AI's turn:
X|O|O
-----
X|X|O
-----
 | | 
-----
Enter the row (0, 1, or 2): 2
Enter the column (0, 1, or 2): 0
X|O|O
-----
X|X|O
-----
X| | 
-----
Congratulations! You win!
