In [1]:
import random
from TicTacToe import TicTacToe

def validBoard(game):
    x_count = game.board.count(1)
    o_count = game.board.count(2)
    
    
    if not (x_count == o_count or x_count == o_count + 1):
        return False
    
    
    if game.check_winner(1) and game.check_winner(2):
        return False
    
    # Check if O wins but X has more or equal moves
    if game.check_winner(2) and x_count != o_count:
        return False
    
    # Check if X wins but O has equal moves
    if game.check_winner(1) and x_count != o_count + 1:
        return False
    
    return True

# Example usage
game = TicTacToe()
game.board = [1, 2, 1, 2, 1, 2, 1, 0, 0]  # Example board state
print(validBoard(game))  # Output: True or False based on the board state


True


In [18]:
import numpy as np
import random
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.95, learning_rate=0.001, batch_size=32, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.995, replay_buffer_size=10000):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.batch_size = batch_size
        self.replay_buffer = deque(maxlen=replay_buffer_size)
        
        self.model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        self.model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))

    def select_action(self, state, valid_actions):
        if np.random.rand() < self.epsilon:
            return random.choice(valid_actions)
        q_values = self.model.predict(state.reshape(1, -1), verbose=0)[0]
        q_values = [q_values[i] if i in valid_actions else float('-inf') for i in range(self.action_size)]
        return np.argmax(q_values)

    def store_experience(self, experience):
        self.replay_buffer.append(experience)

    def train_from_experience(self):
        if len(self.replay_buffer) < self.batch_size:
            return
        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        
        states = np.zeros((self.batch_size, self.state_size))
        q_values_batch = np.zeros((self.batch_size, self.action_size))

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            q_values = self.model.predict(state.reshape(1, -1), verbose=0)[0]
            if done:
                q_target = reward
            else:
                next_q_values = self.model.predict(next_state.reshape(1, -1), verbose=0)[0]
                valid_actions = TicTacToe().empty_positions()
                max_next_q = max(next_q_values[a] for a in valid_actions) if valid_actions else 0
                q_target = reward + self.gamma * max_next_q
            
            q_values[action] = q_target
            states[i] = state
            q_values_batch[i] = q_values

        self.model.fit(states, q_values_batch, epochs=1, verbose=0)
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def save_model(self, filepath):
        self.model.save(filepath)

def train_sqn_agent(num_episodes=100, smartMovePlayer1_prob=0.0, save_interval=1000, model_path="trained_sqn_model.h5"):
    agent = SQNAgent()
    win_count = 0
    loss_count = 0
    draw_count = 0
    
    for episode in range(num_episodes):
        game = TicTacToe(smartMovePlayer1=smartMovePlayer1_prob)
        state = np.array(game.board)
        done = False

        while not done:
            # Player 2 (agent) move
            valid_actions = game.empty_positions()
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            next_state = np.array(game.board)
            done = game.current_winner is not None or game.is_full()
            reward = game.get_reward() if done else 0
            agent.store_experience((state, action, reward, next_state, done))
            state = next_state
            
            # Check if game is done after agent's move
            if done:
                if reward == 1:
                    win_count += 1
                elif reward == -1:
                    loss_count += 1
                else:
                    draw_count += 1
                break

            # Player 1 (opponent) move
            game.player1_move()
            done = game.current_winner is not None or game.is_full()
            state = np.array(game.board)
            
            # Check if game is done after player 1's move
            if done:
                reward = game.get_reward()
                if reward == 1:
                    loss_count += 1
                elif reward == -1:
                    win_count += 1
                else:
                    draw_count += 1
                break

        # Train agent after each episode
        agent.train_from_experience()
        
        # Print results every 100 episodes
        if (episode + 1) % 10== 0:
            print(f"Episode {episode + 1}: Wins: {win_count}, Losses: {loss_count}, Draws: {draw_count}")
            
        
        # Save model at intervals
        if episode % save_interval == 0:
            agent.save_model(f"{model_path.split('.')[0]}_episode_{episode}.h5")
        
        # Increase smartMovePlayer1 probability every 1000 episodes if needed
        if episode % 1000 == 0 and agent.epsilon <= agent.epsilon_min:
            smartMovePlayer1_prob = min(1.0, smartMovePlayer1_prob + 0.1)

    agent.save_model(model_path)
    print("Training complete and model saved")
    return agent

train_sqn_agent()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Player 1 (Smart/Random) chooses position 9
Player 1 (Smart/Random) chooses position 7
Player 1 (Smart/Random) chooses position 4
Player 1 (Smart/Random) chooses position 8
Player 1 (Smart/Random) chooses position 7
Player 1 (Smart/Random) chooses position 1
Player 1 (Smart/Random) chooses position 4
Player 1 (Smart/Random) chooses position 2
Player 1 (Smart/Random) chooses position 6
Player 1 (Smart/Random) chooses position 9
Player 1 (Smart/Random) chooses position 1
Player 1 (Smart/Random) chooses position 3
Player 1 (Smart/Random) chooses position 7
Player 1 (Smart/Random) chooses position 9
Player 1 (Smart/Random) chooses position 5
Player 1 (Smart/Random) chooses position 6
Player 1 (Smart/Random) chooses position 3
Player 1 (Smart/Random) chooses position 4
Player 1 (Smart/Random) chooses position 8
Player 1 (Smart/Random) chooses position 3
Player 1 (Smart/Random) chooses position 9
Player 1 (Smart/Random) chooses position 1
Player 1 (Smart/Random) chooses position 4
Player 1 (S



Episode 100: Wins: 86, Losses: 0, Draws: 14
Training complete and model saved


<__main__.SQNAgent at 0x1696f2510>