Practical No: 6

Build a Tic-Tac-Toe game using reinforcement learning in Python

In [1]:
# Cell 1: Import necessary libraries
import numpy as np
import random
from collections import defaultdict


In [2]:
# Cell 2: Define the Tic-Tac-Toe environment and rules
class TicTacToe:
    def __init__(self):
        self.board = [' ' for _ in range(9)]
        self.current_winner = None

    def print_board(self):
        for row in [self.board[i*3:(i+1)*3] for i in range(3)]:
            print('| ' + ' | '.join(row) + ' |')

    def available_moves(self):
        return [i for i, spot in enumerate(self.board) if spot == ' ']

    def empty_squares(self):
        return ' ' in self.board

    def make_move(self, square, letter):
        if self.board[square] == ' ':
            self.board[square] = letter
            if self.winner(square, letter):
                self.current_winner = letter
            return True
        return False

    def winner(self, square, letter):
        # Check row
        row_ind = square // 3
        row = self.board[row_ind*3:(row_ind+1)*3]
        if all([s == letter for s in row]):
            return True
        # Check column
        col_ind = square % 3
        col = [self.board[col_ind+i*3] for i in range(3)]
        if all([s == letter for s in col]):
            return True
        # Check diagonals
        if square % 2 == 0:
            diagonal1 = [self.board[i] for i in [0,4,8]]
            diagonal2 = [self.board[i] for i in [2,4,6]]
            if all([s == letter for s in diagonal1]) or all([s == letter for s in diagonal2]):
                return True
        return False

    def reset(self):
        self.board = [' ' for _ in range(9)]
        self.current_winner = None


In [3]:
# Cell 3: Define Q-Learning agent for AI
class QLearningAgent:
    def __init__(self, alpha=0.3, gamma=0.9, epsilon=0.2):
        self.q_table = defaultdict(lambda: np.zeros(9))
        self.alpha = alpha    # Learning rate
        self.gamma = gamma    # Discount factor
        self.epsilon = epsilon  # Exploration rate

    def get_state(self, game):
        return ''.join(game.board)

    def choose_action(self, game):
        state = self.get_state(game)
        if random.uniform(0, 1) < self.epsilon:
            return random.choice(game.available_moves())
        else:
            q_values = self.q_table[state]
            max_q = max([q_values[a] for a in game.available_moves()])
            max_actions = [a for a in game.available_moves() if q_values[a] == max_q]
            return random.choice(max_actions)

    def learn(self, state, action, reward, next_state, done):
        future = 0 if done else max(self.q_table[next_state])
        self.q_table[state][action] += self.alpha * (reward + self.gamma * future - self.q_table[state][action])


In [4]:
# Cell 4: Train the AI by playing multiple games against random moves
def train(agent, episodes=10000):
    game = TicTacToe()
    for _ in range(episodes):
        game.reset()
        state = agent.get_state(game)
        done = False
        while not done:
            # AI move
            action = agent.choose_action(game)
            game.make_move(action, 'X')
            next_state = agent.get_state(game)
            
            if game.current_winner == 'X':
                agent.learn(state, action, 1, next_state, True)
                done = True
            elif not game.empty_squares():
                agent.learn(state, action, 0.5, next_state, True)
                done = True
            else:
                # Random opponent move
                opponent_action = random.choice(game.available_moves())
                game.make_move(opponent_action, 'O')
                next_state_op = agent.get_state(game)
                if game.current_winner == 'O':
                    agent.learn(state, action, -1, next_state_op, True)
                    done = True
                else:
                    agent.learn(state, action, 0, next_state_op, False)
                    state = next_state_op

# Initialize and train the agent
agent = QLearningAgent()
train(agent, episodes=10000)


In [5]:
# Cell 5: Play the game interactively with human input
def print_board_positions():
    print("Board positions (0-8):")
    for row in [[str(i+j*3) for i in range(3)] for j in range(3)]:
        print('| ' + ' | '.join(row) + ' |')

def play_human_vs_ai(agent):
    game = TicTacToe()
    print_board_positions()
    game.print_board()
    
    while game.empty_squares():
        # AI move
        action = agent.choose_action(game)
        game.make_move(action, 'X')
        print("\nAI's move:")
        game.print_board()
        if game.current_winner == 'X':
            print("AI wins!")
            return
        if not game.empty_squares():
            print("It's a tie!")
            return

        # Human move
        valid_move = False
        while not valid_move:
            try:
                human_move = input("Enter your move (0-8): ")
                if human_move.lower() == 'exit':
                    print("Game exited.")
                    return
                human_move = int(human_move)
                if human_move in game.available_moves():
                    game.make_move(human_move, 'O')
                    valid_move = True
                else:
                    print("Invalid move! Position already taken or out of range.")
            except ValueError:
                print("Invalid input! Enter a number between 0 and 8.")
        
        print("\nYour move:")
        game.print_board()
        if game.current_winner == 'O':
            print("You win!")
            return


In [6]:
# Cell 6: Start the interactive game
play_human_vs_ai(agent)


Board positions (0-8):
| 0 | 1 | 2 |
| 3 | 4 | 5 |
| 6 | 7 | 8 |
|   |   |   |
|   |   |   |
|   |   |   |

AI's move:
| X |   |   |
|   |   |   |
|   |   |   |


Enter your move (0-8):  4



Your move:
| X |   |   |
|   | O |   |
|   |   |   |

AI's move:
| X |   |   |
| X | O |   |
|   |   |   |


Enter your move (0-8):  6



Your move:
| X |   |   |
| X | O |   |
| O |   |   |

AI's move:
| X |   | X |
| X | O |   |
| O |   |   |


Enter your move (0-8):  1



Your move:
| X | O | X |
| X | O |   |
| O |   |   |

AI's move:
| X | O | X |
| X | O |   |
| O | X |   |


Enter your move (0-8):  8



Your move:
| X | O | X |
| X | O |   |
| O | X | O |

AI's move:
| X | O | X |
| X | O | X |
| O | X | O |
It's a tie!
