In [1]:
!pip install tensorflow

Defaulting to user installation because normal site-packages is not writeable




In [5]:
import numpy as np
import tensorflow as tf

# Tic-Tac-Toe board setup
class TicTacToe:
    def __init__(self):
        self.board = np.zeros((3, 3), dtype=int)
        self.current_player = 1

    def reset(self):
        self.board = np.zeros((3, 3), dtype=int)
        self.current_player = 1
        return self.board.flatten()

    def is_valid_move(self, x, y):
        return self.board[x, y] == 0

    def play_move(self, x, y):
        if not self.is_valid_move(x, y):
            raise ValueError("Invalid move")
        self.board[x, y] = self.current_player
        self.current_player = 3 - self.current_player  # Switch player

    def check_winner(self):
        for player in [1, 2]:
            for row in self.board:
                if np.all(row == player):
                    return player
            for col in self.board.T:
                if np.all(col == player):
                    return player
            if np.all(np.diag(self.board) == player) or np.all(np.diag(np.fliplr(self.board)) == player):
                return player
        if np.all(self.board != 0):
            return 0  # Draw
        return -1  # No winner yet

    def get_state(self):
        return self.board.flatten()

# Q-network
class QNetwork:
    def __init__(self):
        self.model = self.create_model()

    def create_model(self):
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(32, activation='relu', input_shape=(9,)),
            tf.keras.layers.Dense(32, activation='relu'),
            tf.keras.layers.Dense(9, activation='linear')
        ])
        model.compile(optimizer='adam', loss='mse')
        return model

    def predict(self, state):
        return self.model.predict(state[np.newaxis, :])[0]

    def train(self, state, target):
        self.model.fit(state[np.newaxis, :], target[np.newaxis, :], verbose=0)

# Reinforcement Learning Agent
class Agent:
    def __init__(self, player, epsilon=0.1, gamma=0.9):
        self.player = player
        self.epsilon = epsilon
        self.gamma = gamma
        self.q_network = QNetwork()

    def choose_action(self, state, valid_moves):
        if np.random.rand() < self.epsilon:
            return valid_moves[np.random.choice(len(valid_moves))]
        q_values = self.q_network.predict(state)
        return valid_moves[np.argmax([q_values[3 * x + y] for x, y in valid_moves])]

    def train(self, state, action, reward, next_state, valid_moves):
        q_values = self.q_network.predict(state)
        next_q_values = self.q_network.predict(next_state) if next_state is not None else np.zeros(9)
        best_next_q_value = max([next_q_values[3 * x + y] for x, y in valid_moves], default=0)
        q_values[3 * action[0] + action[1]] = reward + self.gamma * best_next_q_value
        self.q_network.train(state, q_values)

# Training
def train_tic_tac_toe(episodes=100):
    game = TicTacToe()
    agents = {1: Agent(player=1), 2: Agent(player=2)}
    for episode in range(episodes):
        state = game.reset()
        done = False
        history = []
        while not done:
            player = game.current_player
            valid_moves = [(x, y) for x in range(3) for y in range(3) if game.is_valid_move(x, y)]
            action = agents[player].choose_action(state, valid_moves)
            game.play_move(*action)
            next_state = game.get_state()
            winner = game.check_winner()
            if winner != -1:
                reward = 1 if winner == player else -1 if winner != 0 else 0
                for s, a, p in reversed(history):
                    agents[p].train(s, a, reward, None, [])
                    reward = -reward
                done = True
            else:
                history.append((state, action, player))
                state = next_state
    print("Training completed")
    return agents

# Testing with human input
def test_with_human(agents):
    game = TicTacToe()
    agent = agents[1]  # Agent for player 1
    state = game.reset()
    done = False
    print("Starting a new game of Tic-Tac-Toe!")
    print("You are player 2 (symbol: 2). Agent is player 1 (symbol: 1).")
    while not done:
        print("Current board:")
        print(game.board)
        if game.current_player == 1:
            # Agent's turn
            valid_moves = [(x, y) for x in range(3) for y in range(3) if game.is_valid_move(x, y)]
            action = agent.choose_action(state, valid_moves)
            print(f"Agent chooses move: {action}")
        else:
            # Human's turn
            valid_moves = [(x, y) for x in range(3) for y in range(3) if game.is_valid_move(x, y)]
            print("Your turn! Enter your move as row and column (e.g., 0 1):")
            x, y = map(int, input().split())
            while (x, y) not in valid_moves:
                print("Invalid move. Try again.")
                x, y = map(int, input().split())
            action = (x, y)

        print("Current board:")
        print(game.board)
        game.play_move(*action)
        state = game.get_state()
        winner = game.check_winner()
        if winner != -1:
            print("Final board:")
            print(game.board)
            if winner == 1:
                print("Agent wins!")
            elif winner == 2:
                print("You win!")
            else:
                print("It's a draw!")
            done = True

if __name__ == "__main__":
    # Train the agent
    agents = train_tic_tac_toe(episodes=150)

    # Test the game with a human player
    test_with_human(agents)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22

ValueError: not enough values to unpack (expected 2, got 1)

In [6]:
test_with_human(agents)

Starting a new game of Tic-Tac-Toe!
You are player 2 (symbol: 2). Agent is player 1 (symbol: 1).
Current board:
[[0 0 0]
 [0 0 0]
 [0 0 0]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Agent chooses move: (2, 1)
Current board:
[[0 0 0]
 [0 0 0]
 [0 0 0]]
Current board:
[[0 0 0]
 [0 0 0]
 [0 1 0]]
Your turn! Enter your move as row and column (e.g., 0 1):


Current board:
[[0 0 0]
 [0 0 0]
 [0 1 0]]
Current board:
[[2 0 0]
 [0 0 0]
 [0 1 0]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Agent chooses move: (0, 2)
Current board:
[[2 0 0]
 [0 0 0]
 [0 1 0]]
Current board:
[[2 0 1]
 [0 0 0]
 [0 1 0]]
Your turn! Enter your move as row and column (e.g., 0 1):
Current board:
[[2 0 1]
 [0 0 0]
 [0 1 0]]
Current board:
[[2 2 1]
 [0 0 0]
 [0 1 0]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Agent chooses move: (2, 2)
Current board:
[[2 2 1]
 [0 0 0]
 [0 1 0]]
Current board:
[[2 2 1]
 [0 0 0]
 [0 1 1]]
Your turn! Enter your move as row and column (e.g., 0 1):
Current board:
[[2 2 1]
 [0 0 0]
 [0 1 1]]
Current board:
[[2 2 1]
 [2 0 0]
 [0 1 1]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Agent chooses move: (2, 0)
Current board:
[[2 2 1]
 [2 0 0]
 [0 1 1]]
Final board:
[[2 2 1]
 [2 0 0]
 [1 1 1]]
Agent wins!
