<a href="https://colab.research.google.com/github/Dhruv-958/BE/blob/main/ML_TicTacToe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
import numpy as np
import random

In [28]:
import numpy as np
import random

# a. Setting up the environment
class TicTacToeEnv:
    def __init__(self):
        self.board = [' ' for _ in range(9)]
        self.current_player = 'X'

    def reset(self):
        self.board = [' ' for _ in range(9)]
        self.current_player = 'X'
        return self.get_state()

    def get_state(self):
        return ''.join(self.board)

    def step(self, action):
        if self.board[action] == ' ':
            self.board[action] = self.current_player
            done, winner = self.check_game_over()
            reward = 1 if winner == self.current_player else 0 if winner is None else -1
            self.current_player = 'O' if self.current_player == 'X' else 'X'
            return self.get_state(), reward, done
        else:
            return self.get_state(), -10, True  # Invalid move

    def check_game_over(self):
        winning_combinations = [
            [0, 1, 2], [3, 4, 5], [6, 7, 8],  # Rows
            [0, 3, 6], [1, 4, 7], [2, 5, 8],  # Columns
            [0, 4, 8], [2, 4, 6]  # Diagonals
        ]
        for combo in winning_combinations:
            if self.board[combo[0]] == self.board[combo[1]] == self.board[combo[2]] != ' ':
                return True, self.board[combo[0]]
        if ' ' not in self.board:
            return True, None
        return False, None

    def render(self):
        print("-------------")
        for i in range(3):
            print("|", self.board[i*3], "|", self.board[i*3+1], "|", self.board[i*3+2], "|")
            print("-------------")

# b. Defining the Tic-Tac-Toe game
class TicTacToeGame:
    def __init__(self):
        self.env = TicTacToeEnv()

    def play_game(self, player1, player2):
        state = self.env.reset()
        done = False
        while not done:
            action = player1.choose_action(state) if self.env.current_player == 'X' else player2.choose_action(state)
            next_state, reward, done = self.env.step(action)
            if self.env.current_player == 'X':
                player2.update(state, action, reward, next_state)
            else:
                player1.update(state, action, reward, next_state)
            state = next_state
        return reward

# c. Building the reinforcement learning model
class QLearningAgent:
    def __init__(self, epsilon=0.1, alpha=0.5, gamma=0.9):
        self.q_table = {}
        self.epsilon = epsilon  # Exploration rate
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor

    def get_q_value(self, state, action):
        return self.q_table.get((state, action), 0.0)

    def choose_action(self, state):
        if random.random() < self.epsilon:
            return random.choice([i for i, spot in enumerate(state) if spot == ' '])
        else:
            q_values = [self.get_q_value(state, action) for action in range(9) if state[action] == ' ']
            max_q = max(q_values)
            best_actions = [action for action in range(9) if state[action] == ' ' and self.get_q_value(state, action) == max_q]
            return random.choice(best_actions)

    def update(self, state, action, reward, next_state):
        best_next_action = max([self.get_q_value(next_state, a) for a in range(9) if next_state[a] == ' '], default=0)
        td_target = reward + self.gamma * best_next_action
        td_error = td_target - self.get_q_value(state, action)
        self.q_table[(state, action)] = self.get_q_value(state, action) + self.alpha * td_error

class HumanPlayer:
    def choose_action(self, state):
        while True:
            try:
                action = int(input("Enter your move (0-8): "))
                if 0 <= action <= 8 and state[action] == ' ':
                    return action
                else:
                    print("Invalid move. Try again.")
            except ValueError:
                print("Invalid input. Please enter a number between 0 and 8.")

    def update(self, state, action, reward, next_state):
        pass  # Human doesn't need to update

# d. Training the model
def train_agent(episodes=10000):
    agent = QLearningAgent()
    game = TicTacToeGame()
    for _ in range(episodes):
        game.play_game(agent, agent)
    return agent

# e. Testing the model
def test_agent(agent, episodes=1000):
    game = TicTacToeGame()
    random_player = QLearningAgent(epsilon=1.0)  # Always plays randomly
    wins = 0
    for _ in range(episodes):
        reward = game.play_game(agent, random_player)
        if reward == 1:
            wins += 1
    return wins / episodes

# f. Human vs Computer gameplay
def human_vs_computer(agent):
    env = TicTacToeEnv()
    human = HumanPlayer()
    state = env.reset()
    done = False

    print("Welcome to Tic-Tac-Toe!")
    print("You are 'X', and the computer is 'O'.")
    print("Enter your moves using numbers 0-8, corresponding to the positions on the board:")
    print("0 | 1 | 2")
    print("3 | 4 | 5")
    print("6 | 7 | 8")
    print("\nLet's begin!")

    while not done:
        env.render()
        if env.current_player == 'X':
            action = human.choose_action(state)
        else:
            action = agent.choose_action(state)
            print(f"Computer chose: {action}")

        state, reward, done = env.step(action)

        if done:
            env.render()
            if reward == 1:
                winner = "You" if env.current_player == 'O' else "Computer"
                print(f"{winner} win!")
            elif reward == 0:
                print("It's a draw!")
            return "human" if winner == "You" else "computer" if winner == "Computer" else "draw"

# Main execution
if __name__ == "__main__":
    trained_agent = train_agent()
    win_rate = test_agent(trained_agent)
    print(f"Agent win rate against random player: {win_rate:.2%}")

    play_again = 'y'
    games_played = 0
    computer_wins = 0
    human_wins = 0
    draws = 0

    while play_again.lower() == 'y':
        result = human_vs_computer(trained_agent)
        games_played += 1
        if result == "computer":
            computer_wins += 1
        elif result == "human":
            human_wins += 1
        else:
            draws += 1

        computer_win_rate = computer_wins / games_played
        print(f"\nGames played: {games_played}")
        print(f"Computer wins: {computer_wins}")
        print(f"Human wins: {human_wins}")
        print(f"Draws: {draws}")
        print(f"Computer win rate: {computer_win_rate:.2%}")
        play_again = input("Do you want to play again? (y/n): ")

    print("Thanks for playing!")


Agent win rate against random player: 92.30%
Welcome to Tic-Tac-Toe!
You are 'X', and the computer is 'O'.
Enter your moves using numbers 0-8, corresponding to the positions on the board:
0 | 1 | 2
3 | 4 | 5
6 | 7 | 8

Let's begin!
-------------
|   |   |   |
-------------
|   |   |   |
-------------
|   |   |   |
-------------
Enter your move (0-8): 0
-------------
| X |   |   |
-------------
|   |   |   |
-------------
|   |   |   |
-------------
Computer chose: 1
-------------
| X | O |   |
-------------
|   |   |   |
-------------
|   |   |   |
-------------
Enter your move (0-8): 8
-------------
| X | O |   |
-------------
|   |   |   |
-------------
|   |   | X |
-------------
Computer chose: 6
-------------
| X | O |   |
-------------
|   |   |   |
-------------
| O |   | X |
-------------
Enter your move (0-8): 2
-------------
| X | O | X |
-------------
|   |   |   |
-------------
| O |   | X |
-------------
Computer chose: 5
-------------
| X | O | X |
-------------
|   |   |