In [5]:
import numpy as np
import random
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.95):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9995
        self.batch_size = 32
        self.replay_buffer = deque(maxlen=10000)
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def process_state(self, state):
        """
        Converts the state from [0,1,2] format to [-1,0,1] format for better learning.
        """
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                next_valid_actions = [j for j, val in enumerate(next_state) if val == 0]
                if next_valid_actions:
                    max_next_q = max(next_q_values[i][action] for action in next_valid_actions)
                    target = reward + self.gamma * max_next_q
                else:
                    target = reward
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=32, epochs=1, verbose=0)

def train_agent(episodes=10000):
    agent = SQNAgent()
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    initial_epsilon = 1.0
    min_epsilon = 0.01
    
    
    for episode in range(episodes):
        agent.epsilon=min(min_epsilon,agent.epsilon*agent.epsilon_decay)
        
        smartness = min(1, episode / (episodes * 0.6))
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 1.0
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            game.player1_move()
            if game.current_winner == 1:
                reward = -1.0
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = 0.0
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        if episode % 100 == 0:
            win_rate = history['wins'] / (episode + 1)
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Epsilon: {agent.epsilon:.3f}, smartmove{smartness}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
        
        if episode % 1000 == 0:
            agent.model.save(f'model1_episode_{episode}.h5')
    agent.model.save('model1.h5')
agent = train_agent()





Episode: 0, Win Rate: 0.00, Epsilon: 0.010, smartmove0.0
Wins: 0, Losses: 1, Draws: 0
Episode: 100, Win Rate: 0.30, Epsilon: 0.010, smartmove0.016666666666666666
Wins: 30, Losses: 56, Draws: 15
Episode: 200, Win Rate: 0.29, Epsilon: 0.009, smartmove0.03333333333333333
Wins: 58, Losses: 106, Draws: 37
Episode: 300, Win Rate: 0.33, Epsilon: 0.009, smartmove0.05
Wins: 98, Losses: 155, Draws: 48
Episode: 400, Win Rate: 0.32, Epsilon: 0.008, smartmove0.06666666666666667
Wins: 130, Losses: 210, Draws: 61
Episode: 500, Win Rate: 0.34, Epsilon: 0.008, smartmove0.08333333333333333
Wins: 169, Losses: 257, Draws: 75
Episode: 600, Win Rate: 0.34, Epsilon: 0.007, smartmove0.1
Wins: 202, Losses: 315, Draws: 84
Episode: 700, Win Rate: 0.33, Epsilon: 0.007, smartmove0.11666666666666667
Wins: 232, Losses: 371, Draws: 98
Episode: 800, Win Rate: 0.33, Epsilon: 0.007, smartmove0.13333333333333333
Wins: 268, Losses: 431, Draws: 102
Episode: 900, Win Rate: 0.33, Epsilon: 0.006, smartmove0.15
Wins: 297, Loss



Episode: 1000, Win Rate: 0.32, Epsilon: 0.006, smartmove0.16666666666666666
Wins: 324, Losses: 542, Draws: 135
Episode: 1100, Win Rate: 0.32, Epsilon: 0.006, smartmove0.18333333333333332
Wins: 351, Losses: 600, Draws: 150
Episode: 1200, Win Rate: 0.32, Epsilon: 0.005, smartmove0.2
Wins: 384, Losses: 656, Draws: 161
Episode: 1300, Win Rate: 0.32, Epsilon: 0.005, smartmove0.21666666666666667
Wins: 410, Losses: 709, Draws: 182
Episode: 1400, Win Rate: 0.31, Epsilon: 0.005, smartmove0.23333333333333334
Wins: 428, Losses: 776, Draws: 197
Episode: 1500, Win Rate: 0.30, Epsilon: 0.005, smartmove0.25
Wins: 447, Losses: 837, Draws: 217
Episode: 1600, Win Rate: 0.30, Epsilon: 0.004, smartmove0.26666666666666666
Wins: 478, Losses: 888, Draws: 235
Episode: 1700, Win Rate: 0.30, Epsilon: 0.004, smartmove0.2833333333333333
Wins: 503, Losses: 944, Draws: 254
Episode: 1800, Win Rate: 0.30, Epsilon: 0.004, smartmove0.3
Wins: 533, Losses: 997, Draws: 271
Episode: 1900, Win Rate: 0.29, Epsilon: 0.004, sm



Episode: 2000, Win Rate: 0.28, Epsilon: 0.004, smartmove0.3333333333333333
Wins: 566, Losses: 1134, Draws: 301
Episode: 2100, Win Rate: 0.28, Epsilon: 0.003, smartmove0.35
Wins: 594, Losses: 1193, Draws: 314
Episode: 2200, Win Rate: 0.28, Epsilon: 0.003, smartmove0.36666666666666664
Wins: 617, Losses: 1262, Draws: 322
Episode: 2300, Win Rate: 0.28, Epsilon: 0.003, smartmove0.38333333333333336
Wins: 641, Losses: 1320, Draws: 340
Episode: 2400, Win Rate: 0.28, Epsilon: 0.003, smartmove0.4
Wins: 661, Losses: 1377, Draws: 363
Episode: 2500, Win Rate: 0.28, Epsilon: 0.003, smartmove0.4166666666666667
Wins: 690, Losses: 1433, Draws: 378
Episode: 2600, Win Rate: 0.27, Epsilon: 0.003, smartmove0.43333333333333335
Wins: 710, Losses: 1494, Draws: 397
Episode: 2700, Win Rate: 0.27, Epsilon: 0.003, smartmove0.45
Wins: 728, Losses: 1554, Draws: 419
Episode: 2800, Win Rate: 0.27, Epsilon: 0.002, smartmove0.4666666666666667
Wins: 747, Losses: 1611, Draws: 443
Episode: 2900, Win Rate: 0.27, Epsilon: 0



Episode: 3000, Win Rate: 0.27, Epsilon: 0.002, smartmove0.5
Wins: 799, Losses: 1732, Draws: 470
Episode: 3100, Win Rate: 0.27, Epsilon: 0.002, smartmove0.5166666666666667
Wins: 823, Losses: 1790, Draws: 488
Episode: 3200, Win Rate: 0.26, Epsilon: 0.002, smartmove0.5333333333333333
Wins: 845, Losses: 1849, Draws: 507
Episode: 3300, Win Rate: 0.26, Epsilon: 0.002, smartmove0.55
Wins: 861, Losses: 1901, Draws: 539
Episode: 3400, Win Rate: 0.26, Epsilon: 0.002, smartmove0.5666666666666667
Wins: 879, Losses: 1963, Draws: 559
Episode: 3500, Win Rate: 0.26, Epsilon: 0.002, smartmove0.5833333333333334
Wins: 896, Losses: 2018, Draws: 587
Episode: 3600, Win Rate: 0.25, Epsilon: 0.002, smartmove0.6
Wins: 910, Losses: 2084, Draws: 607
Episode: 3700, Win Rate: 0.25, Epsilon: 0.002, smartmove0.6166666666666667
Wins: 920, Losses: 2144, Draws: 637
Episode: 3800, Win Rate: 0.25, Epsilon: 0.001, smartmove0.6333333333333333
Wins: 940, Losses: 2201, Draws: 660
Episode: 3900, Win Rate: 0.24, Epsilon: 0.001



Episode: 4000, Win Rate: 0.24, Epsilon: 0.001, smartmove0.6666666666666666
Wins: 966, Losses: 2322, Draws: 713
Episode: 4100, Win Rate: 0.24, Epsilon: 0.001, smartmove0.6833333333333333
Wins: 983, Losses: 2378, Draws: 740
Episode: 4200, Win Rate: 0.24, Epsilon: 0.001, smartmove0.7
Wins: 995, Losses: 2438, Draws: 768
Episode: 4300, Win Rate: 0.23, Epsilon: 0.001, smartmove0.7166666666666667
Wins: 1009, Losses: 2495, Draws: 797
Episode: 4400, Win Rate: 0.23, Epsilon: 0.001, smartmove0.7333333333333333
Wins: 1026, Losses: 2543, Draws: 832
Episode: 4500, Win Rate: 0.23, Epsilon: 0.001, smartmove0.75
Wins: 1040, Losses: 2590, Draws: 871
Episode: 4600, Win Rate: 0.23, Epsilon: 0.001, smartmove0.7666666666666667
Wins: 1054, Losses: 2643, Draws: 904
Episode: 4700, Win Rate: 0.23, Epsilon: 0.001, smartmove0.7833333333333333
Wins: 1066, Losses: 2708, Draws: 927
Episode: 4800, Win Rate: 0.22, Epsilon: 0.001, smartmove0.8
Wins: 1079, Losses: 2758, Draws: 964
Episode: 4900, Win Rate: 0.22, Epsilon:



Episode: 5000, Win Rate: 0.22, Epsilon: 0.001, smartmove0.8333333333333334
Wins: 1106, Losses: 2876, Draws: 1019
Episode: 5100, Win Rate: 0.22, Epsilon: 0.001, smartmove0.85
Wins: 1111, Losses: 2937, Draws: 1053
Episode: 5200, Win Rate: 0.22, Epsilon: 0.001, smartmove0.8666666666666667
Wins: 1120, Losses: 3001, Draws: 1080
Episode: 5300, Win Rate: 0.21, Epsilon: 0.001, smartmove0.8833333333333333
Wins: 1133, Losses: 3058, Draws: 1110
Episode: 5400, Win Rate: 0.21, Epsilon: 0.001, smartmove0.9
Wins: 1138, Losses: 3117, Draws: 1146
Episode: 5500, Win Rate: 0.21, Epsilon: 0.001, smartmove0.9166666666666666
Wins: 1149, Losses: 3178, Draws: 1174
Episode: 5600, Win Rate: 0.21, Epsilon: 0.001, smartmove0.9333333333333333
Wins: 1156, Losses: 3243, Draws: 1202
Episode: 5700, Win Rate: 0.20, Epsilon: 0.001, smartmove0.95
Wins: 1162, Losses: 3310, Draws: 1229
Episode: 5800, Win Rate: 0.20, Epsilon: 0.001, smartmove0.9666666666666667
Wins: 1164, Losses: 3382, Draws: 1255
Episode: 5900, Win Rate: 0



Episode: 6000, Win Rate: 0.19, Epsilon: 0.000, smartmove1
Wins: 1168, Losses: 3531, Draws: 1302
Episode: 6100, Win Rate: 0.19, Epsilon: 0.000, smartmove1
Wins: 1173, Losses: 3587, Draws: 1341
Episode: 6200, Win Rate: 0.19, Epsilon: 0.000, smartmove1
Wins: 1174, Losses: 3653, Draws: 1374
Episode: 6300, Win Rate: 0.19, Epsilon: 0.000, smartmove1
Wins: 1179, Losses: 3713, Draws: 1409
Episode: 6400, Win Rate: 0.18, Epsilon: 0.000, smartmove1
Wins: 1183, Losses: 3779, Draws: 1439
Episode: 6500, Win Rate: 0.18, Epsilon: 0.000, smartmove1
Wins: 1188, Losses: 3846, Draws: 1467
Episode: 6600, Win Rate: 0.18, Epsilon: 0.000, smartmove1
Wins: 1193, Losses: 3913, Draws: 1495
Episode: 6700, Win Rate: 0.18, Epsilon: 0.000, smartmove1
Wins: 1196, Losses: 3972, Draws: 1533
Episode: 6800, Win Rate: 0.18, Epsilon: 0.000, smartmove1
Wins: 1201, Losses: 4028, Draws: 1572
Episode: 6900, Win Rate: 0.17, Epsilon: 0.000, smartmove1
Wins: 1204, Losses: 4095, Draws: 1602




Episode: 7000, Win Rate: 0.17, Epsilon: 0.000, smartmove1
Wins: 1206, Losses: 4150, Draws: 1645
Episode: 7100, Win Rate: 0.17, Epsilon: 0.000, smartmove1
Wins: 1210, Losses: 4214, Draws: 1677
Episode: 7200, Win Rate: 0.17, Epsilon: 0.000, smartmove1
Wins: 1220, Losses: 4273, Draws: 1708
Episode: 7300, Win Rate: 0.17, Epsilon: 0.000, smartmove1
Wins: 1227, Losses: 4332, Draws: 1742
Episode: 7400, Win Rate: 0.17, Epsilon: 0.000, smartmove1
Wins: 1232, Losses: 4390, Draws: 1779
Episode: 7500, Win Rate: 0.16, Epsilon: 0.000, smartmove1
Wins: 1234, Losses: 4460, Draws: 1807
Episode: 7600, Win Rate: 0.16, Epsilon: 0.000, smartmove1
Wins: 1234, Losses: 4540, Draws: 1827
Episode: 7700, Win Rate: 0.16, Epsilon: 0.000, smartmove1
Wins: 1238, Losses: 4603, Draws: 1860
Episode: 7800, Win Rate: 0.16, Epsilon: 0.000, smartmove1
Wins: 1242, Losses: 4677, Draws: 1882
Episode: 7900, Win Rate: 0.16, Epsilon: 0.000, smartmove1
Wins: 1243, Losses: 4746, Draws: 1912




Episode: 8000, Win Rate: 0.16, Epsilon: 0.000, smartmove1
Wins: 1247, Losses: 4810, Draws: 1944
Episode: 8100, Win Rate: 0.15, Epsilon: 0.000, smartmove1
Wins: 1251, Losses: 4883, Draws: 1967
Episode: 8200, Win Rate: 0.15, Epsilon: 0.000, smartmove1
Wins: 1254, Losses: 4947, Draws: 2000
Episode: 8300, Win Rate: 0.15, Epsilon: 0.000, smartmove1
Wins: 1264, Losses: 5013, Draws: 2024
Episode: 8400, Win Rate: 0.15, Epsilon: 0.000, smartmove1
Wins: 1271, Losses: 5078, Draws: 2052
Episode: 8500, Win Rate: 0.15, Epsilon: 0.000, smartmove1
Wins: 1279, Losses: 5146, Draws: 2076
Episode: 8600, Win Rate: 0.15, Epsilon: 0.000, smartmove1
Wins: 1283, Losses: 5211, Draws: 2107
Episode: 8700, Win Rate: 0.15, Epsilon: 0.000, smartmove1
Wins: 1286, Losses: 5285, Draws: 2130
Episode: 8800, Win Rate: 0.15, Epsilon: 0.000, smartmove1
Wins: 1290, Losses: 5354, Draws: 2157
Episode: 8900, Win Rate: 0.15, Epsilon: 0.000, smartmove1
Wins: 1293, Losses: 5425, Draws: 2183




Episode: 9000, Win Rate: 0.14, Epsilon: 0.000, smartmove1
Wins: 1300, Losses: 5498, Draws: 2203
Episode: 9100, Win Rate: 0.14, Epsilon: 0.000, smartmove1
Wins: 1308, Losses: 5572, Draws: 2221
Episode: 9200, Win Rate: 0.14, Epsilon: 0.000, smartmove1
Wins: 1315, Losses: 5638, Draws: 2248
Episode: 9300, Win Rate: 0.14, Epsilon: 0.000, smartmove1
Wins: 1324, Losses: 5706, Draws: 2271
Episode: 9400, Win Rate: 0.14, Epsilon: 0.000, smartmove1
Wins: 1330, Losses: 5778, Draws: 2293
Episode: 9500, Win Rate: 0.14, Epsilon: 0.000, smartmove1
Wins: 1336, Losses: 5842, Draws: 2323
Episode: 9600, Win Rate: 0.14, Epsilon: 0.000, smartmove1
Wins: 1341, Losses: 5913, Draws: 2347
Episode: 9700, Win Rate: 0.14, Epsilon: 0.000, smartmove1
Wins: 1347, Losses: 5972, Draws: 2382
Episode: 9800, Win Rate: 0.14, Epsilon: 0.000, smartmove1
Wins: 1351, Losses: 6049, Draws: 2401
Episode: 9900, Win Rate: 0.14, Epsilon: 0.000, smartmove1
Wins: 1356, Losses: 6122, Draws: 2423




In [7]:
import numpy as np
import random
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.95):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9995
        self.batch_size = 32
        self.replay_buffer = deque(maxlen=10000)
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def process_state(self, state):
        """
        Converts the state from [0,1,2] format to [-1,0,1] format for better learning.
        """
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                next_valid_actions = [j for j, val in enumerate(next_state) if val == 0]
                if next_valid_actions:
                    max_next_q = max(next_q_values[i][action] for action in next_valid_actions)
                    target = reward + self.gamma * max_next_q
                else:
                    target = reward
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=32, epochs=1, verbose=0)

def train_agent(episodes=10000):
    agent = SQNAgent()
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    initial_epsilon = 1.0
    min_epsilon = 0.01
    decay_episodes = 1000
    
    for episode in range(episodes):
        if episode % 2000 == 0 and episode > 0:
            agent.epsilon +=0.4
            print("Increasing Epsillion")
        else:
            epsilon_decay = (initial_epsilon - min_epsilon) / decay_episodes
            agent.epsilon = max(min_epsilon, initial_epsilon - (episode % 2000) * epsilon_decay)
        
        smartness = min(1, episode / (episodes * 0.6))
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 1.0
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            game.player1_move()
            if game.current_winner == 1:
                reward = -1.0
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = 0.0
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        if episode % 100 == 0:
            win_rate = history['wins'] / (episode + 1)
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Epsilon: {agent.epsilon:.3f}, smartmove{smartness}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
        
        if episode % 1000 == 0:
            agent.model.save(f'model2_episode_{episode}.h5')
    agent.model.save('model2.h5')
agent = train_agent()





Episode: 0, Win Rate: 0.00, Epsilon: 1.000, smartmove0.0
Wins: 0, Losses: 1, Draws: 0
Episode: 100, Win Rate: 0.29, Epsilon: 0.901, smartmove0.016666666666666666
Wins: 29, Losses: 57, Draws: 15
Episode: 200, Win Rate: 0.28, Epsilon: 0.802, smartmove0.03333333333333333
Wins: 57, Losses: 116, Draws: 28
Episode: 300, Win Rate: 0.28, Epsilon: 0.703, smartmove0.05
Wins: 85, Losses: 180, Draws: 36
Episode: 400, Win Rate: 0.30, Epsilon: 0.604, smartmove0.06666666666666667
Wins: 120, Losses: 238, Draws: 43
Episode: 500, Win Rate: 0.30, Epsilon: 0.505, smartmove0.08333333333333333
Wins: 152, Losses: 297, Draws: 52
Episode: 600, Win Rate: 0.32, Epsilon: 0.406, smartmove0.1
Wins: 192, Losses: 350, Draws: 59
Episode: 700, Win Rate: 0.32, Epsilon: 0.307, smartmove0.11666666666666667
Wins: 226, Losses: 408, Draws: 67
Episode: 800, Win Rate: 0.31, Epsilon: 0.208, smartmove0.13333333333333333
Wins: 252, Losses: 471, Draws: 78
Episode: 900, Win Rate: 0.31, Epsilon: 0.109, smartmove0.15
Wins: 282, Losse



Episode: 1000, Win Rate: 0.32, Epsilon: 0.010, smartmove0.16666666666666666
Wins: 320, Losses: 583, Draws: 98
Episode: 1100, Win Rate: 0.32, Epsilon: 0.010, smartmove0.18333333333333332
Wins: 352, Losses: 643, Draws: 106
Episode: 1200, Win Rate: 0.32, Epsilon: 0.010, smartmove0.2
Wins: 382, Losses: 703, Draws: 116
Episode: 1300, Win Rate: 0.31, Epsilon: 0.010, smartmove0.21666666666666667
Wins: 409, Losses: 765, Draws: 127
Episode: 1400, Win Rate: 0.32, Epsilon: 0.010, smartmove0.23333333333333334
Wins: 444, Losses: 818, Draws: 139
Episode: 1500, Win Rate: 0.32, Epsilon: 0.010, smartmove0.25
Wins: 482, Losses: 874, Draws: 145
Episode: 1600, Win Rate: 0.32, Epsilon: 0.010, smartmove0.26666666666666666
Wins: 515, Losses: 936, Draws: 150
Episode: 1700, Win Rate: 0.32, Epsilon: 0.010, smartmove0.2833333333333333
Wins: 538, Losses: 1001, Draws: 162
Episode: 1800, Win Rate: 0.31, Epsilon: 0.010, smartmove0.3
Wins: 563, Losses: 1064, Draws: 174
Episode: 1900, Win Rate: 0.31, Epsilon: 0.010, s



Increasing Epsillion
Episode: 2000, Win Rate: 0.31, Epsilon: 0.410, smartmove0.3333333333333333
Wins: 625, Losses: 1187, Draws: 189
Episode: 2100, Win Rate: 0.31, Epsilon: 0.901, smartmove0.35
Wins: 651, Losses: 1253, Draws: 197
Episode: 2200, Win Rate: 0.30, Epsilon: 0.802, smartmove0.36666666666666664
Wins: 668, Losses: 1322, Draws: 211
Episode: 2300, Win Rate: 0.30, Epsilon: 0.703, smartmove0.38333333333333336
Wins: 690, Losses: 1387, Draws: 224
Episode: 2400, Win Rate: 0.30, Epsilon: 0.604, smartmove0.4
Wins: 712, Losses: 1458, Draws: 231
Episode: 2500, Win Rate: 0.30, Epsilon: 0.505, smartmove0.4166666666666667
Wins: 739, Losses: 1520, Draws: 242
Episode: 2600, Win Rate: 0.29, Epsilon: 0.406, smartmove0.43333333333333335
Wins: 765, Losses: 1587, Draws: 249
Episode: 2700, Win Rate: 0.29, Epsilon: 0.307, smartmove0.45
Wins: 787, Losses: 1656, Draws: 258
Episode: 2800, Win Rate: 0.29, Epsilon: 0.208, smartmove0.4666666666666667
Wins: 814, Losses: 1722, Draws: 265
Episode: 2900, Win R



Episode: 3000, Win Rate: 0.28, Epsilon: 0.010, smartmove0.5
Wins: 854, Losses: 1862, Draws: 285
Episode: 3100, Win Rate: 0.28, Epsilon: 0.010, smartmove0.5166666666666667
Wins: 873, Losses: 1933, Draws: 295
Episode: 3200, Win Rate: 0.28, Epsilon: 0.010, smartmove0.5333333333333333
Wins: 904, Losses: 1999, Draws: 298
Episode: 3300, Win Rate: 0.28, Epsilon: 0.010, smartmove0.55
Wins: 936, Losses: 2055, Draws: 310
Episode: 3400, Win Rate: 0.28, Epsilon: 0.010, smartmove0.5666666666666667
Wins: 947, Losses: 2129, Draws: 325
Episode: 3500, Win Rate: 0.27, Epsilon: 0.010, smartmove0.5833333333333334
Wins: 961, Losses: 2205, Draws: 335
Episode: 3600, Win Rate: 0.27, Epsilon: 0.010, smartmove0.6
Wins: 986, Losses: 2260, Draws: 355
Episode: 3700, Win Rate: 0.27, Epsilon: 0.010, smartmove0.6166666666666667
Wins: 1013, Losses: 2322, Draws: 366
Episode: 3800, Win Rate: 0.27, Epsilon: 0.010, smartmove0.6333333333333333
Wins: 1034, Losses: 2383, Draws: 384
Episode: 3900, Win Rate: 0.27, Epsilon: 0.0



Increasing Epsillion
Episode: 4000, Win Rate: 0.27, Epsilon: 0.410, smartmove0.6666666666666666
Wins: 1073, Losses: 2514, Draws: 414
Episode: 4100, Win Rate: 0.26, Epsilon: 0.901, smartmove0.6833333333333333
Wins: 1076, Losses: 2598, Draws: 427
Episode: 4200, Win Rate: 0.26, Epsilon: 0.802, smartmove0.7
Wins: 1086, Losses: 2675, Draws: 440
Episode: 4300, Win Rate: 0.25, Epsilon: 0.703, smartmove0.7166666666666667
Wins: 1094, Losses: 2760, Draws: 447
Episode: 4400, Win Rate: 0.25, Epsilon: 0.604, smartmove0.7333333333333333
Wins: 1108, Losses: 2833, Draws: 460
Episode: 4500, Win Rate: 0.25, Epsilon: 0.505, smartmove0.75
Wins: 1120, Losses: 2913, Draws: 468
Episode: 4600, Win Rate: 0.25, Epsilon: 0.406, smartmove0.7666666666666667
Wins: 1128, Losses: 2986, Draws: 487
Episode: 4700, Win Rate: 0.24, Epsilon: 0.307, smartmove0.7833333333333333
Wins: 1140, Losses: 3056, Draws: 505
Episode: 4800, Win Rate: 0.24, Epsilon: 0.208, smartmove0.8
Wins: 1160, Losses: 3114, Draws: 527
Episode: 4900, 



Episode: 5000, Win Rate: 0.24, Epsilon: 0.010, smartmove0.8333333333333334
Wins: 1187, Losses: 3246, Draws: 568
Episode: 5100, Win Rate: 0.24, Epsilon: 0.010, smartmove0.85
Wins: 1203, Losses: 3299, Draws: 599
Episode: 5200, Win Rate: 0.23, Epsilon: 0.010, smartmove0.8666666666666667
Wins: 1218, Losses: 3365, Draws: 618
Episode: 5300, Win Rate: 0.23, Epsilon: 0.010, smartmove0.8833333333333333
Wins: 1234, Losses: 3420, Draws: 647
Episode: 5400, Win Rate: 0.23, Epsilon: 0.010, smartmove0.9
Wins: 1251, Losses: 3475, Draws: 675
Episode: 5500, Win Rate: 0.23, Epsilon: 0.010, smartmove0.9166666666666666
Wins: 1267, Losses: 3519, Draws: 715
Episode: 5600, Win Rate: 0.23, Epsilon: 0.010, smartmove0.9333333333333333
Wins: 1281, Losses: 3575, Draws: 745
Episode: 5700, Win Rate: 0.23, Epsilon: 0.010, smartmove0.95
Wins: 1292, Losses: 3632, Draws: 777
Episode: 5800, Win Rate: 0.23, Epsilon: 0.010, smartmove0.9666666666666667
Wins: 1307, Losses: 3687, Draws: 807
Episode: 5900, Win Rate: 0.22, Epsi



Increasing Epsillion
Episode: 6000, Win Rate: 0.22, Epsilon: 0.410, smartmove1
Wins: 1323, Losses: 3795, Draws: 883
Episode: 6100, Win Rate: 0.22, Epsilon: 0.901, smartmove1
Wins: 1324, Losses: 3881, Draws: 896
Episode: 6200, Win Rate: 0.21, Epsilon: 0.802, smartmove1
Wins: 1325, Losses: 3967, Draws: 909
Episode: 6300, Win Rate: 0.21, Epsilon: 0.703, smartmove1
Wins: 1328, Losses: 4053, Draws: 920
Episode: 6400, Win Rate: 0.21, Epsilon: 0.604, smartmove1
Wins: 1329, Losses: 4138, Draws: 934
Episode: 6500, Win Rate: 0.21, Epsilon: 0.505, smartmove1
Wins: 1333, Losses: 4216, Draws: 952
Episode: 6600, Win Rate: 0.20, Epsilon: 0.406, smartmove1
Wins: 1336, Losses: 4291, Draws: 974
Episode: 6700, Win Rate: 0.20, Epsilon: 0.307, smartmove1
Wins: 1337, Losses: 4369, Draws: 995
Episode: 6800, Win Rate: 0.20, Epsilon: 0.208, smartmove1
Wins: 1344, Losses: 4436, Draws: 1021
Episode: 6900, Win Rate: 0.20, Epsilon: 0.109, smartmove1
Wins: 1348, Losses: 4497, Draws: 1056




Episode: 7000, Win Rate: 0.19, Epsilon: 0.010, smartmove1
Wins: 1352, Losses: 4557, Draws: 1092
Episode: 7100, Win Rate: 0.19, Epsilon: 0.010, smartmove1
Wins: 1360, Losses: 4604, Draws: 1137
Episode: 7200, Win Rate: 0.19, Epsilon: 0.010, smartmove1
Wins: 1364, Losses: 4655, Draws: 1182
Episode: 7300, Win Rate: 0.19, Epsilon: 0.010, smartmove1
Wins: 1371, Losses: 4698, Draws: 1232
Episode: 7400, Win Rate: 0.19, Epsilon: 0.010, smartmove1
Wins: 1378, Losses: 4749, Draws: 1274
Episode: 7500, Win Rate: 0.18, Epsilon: 0.010, smartmove1
Wins: 1382, Losses: 4789, Draws: 1330
Episode: 7600, Win Rate: 0.18, Epsilon: 0.010, smartmove1
Wins: 1388, Losses: 4828, Draws: 1385
Episode: 7700, Win Rate: 0.18, Epsilon: 0.010, smartmove1
Wins: 1397, Losses: 4876, Draws: 1428
Episode: 7800, Win Rate: 0.18, Epsilon: 0.010, smartmove1
Wins: 1405, Losses: 4921, Draws: 1475
Episode: 7900, Win Rate: 0.18, Epsilon: 0.010, smartmove1
Wins: 1409, Losses: 4961, Draws: 1531




Increasing Epsillion
Episode: 8000, Win Rate: 0.18, Epsilon: 0.410, smartmove1
Wins: 1412, Losses: 5006, Draws: 1583
Episode: 8100, Win Rate: 0.17, Epsilon: 0.901, smartmove1
Wins: 1413, Losses: 5095, Draws: 1593
Episode: 8200, Win Rate: 0.17, Epsilon: 0.802, smartmove1
Wins: 1413, Losses: 5182, Draws: 1606
Episode: 8300, Win Rate: 0.17, Epsilon: 0.703, smartmove1
Wins: 1415, Losses: 5263, Draws: 1623
Episode: 8400, Win Rate: 0.17, Epsilon: 0.604, smartmove1
Wins: 1415, Losses: 5350, Draws: 1636
Episode: 8500, Win Rate: 0.17, Epsilon: 0.505, smartmove1
Wins: 1416, Losses: 5436, Draws: 1649
Episode: 8600, Win Rate: 0.16, Epsilon: 0.406, smartmove1
Wins: 1418, Losses: 5512, Draws: 1671
Episode: 8700, Win Rate: 0.16, Epsilon: 0.307, smartmove1
Wins: 1421, Losses: 5574, Draws: 1706
Episode: 8800, Win Rate: 0.16, Epsilon: 0.208, smartmove1
Wins: 1423, Losses: 5633, Draws: 1745
Episode: 8900, Win Rate: 0.16, Epsilon: 0.109, smartmove1
Wins: 1429, Losses: 5686, Draws: 1786




Episode: 9000, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1433, Losses: 5728, Draws: 1840
Episode: 9100, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1439, Losses: 5756, Draws: 1906
Episode: 9200, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1445, Losses: 5799, Draws: 1957
Episode: 9300, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1453, Losses: 5836, Draws: 2012
Episode: 9400, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1459, Losses: 5868, Draws: 2074
Episode: 9500, Win Rate: 0.15, Epsilon: 0.010, smartmove1
Wins: 1464, Losses: 5909, Draws: 2128
Episode: 9600, Win Rate: 0.15, Epsilon: 0.010, smartmove1
Wins: 1469, Losses: 5948, Draws: 2184
Episode: 9700, Win Rate: 0.15, Epsilon: 0.010, smartmove1
Wins: 1477, Losses: 5973, Draws: 2251
Episode: 9800, Win Rate: 0.15, Epsilon: 0.010, smartmove1
Wins: 1482, Losses: 6005, Draws: 2314
Episode: 9900, Win Rate: 0.15, Epsilon: 0.010, smartmove1
Wins: 1492, Losses: 6042, Draws: 2367




In [8]:
import numpy as np
import random
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.95):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9995
        self.batch_size = 32
        self.replay_buffer = deque(maxlen=10000)
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def process_state(self, state):
        """
        Converts the state from [0,1,2] format to [-1,0,1] format for better learning.
        """
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                next_valid_actions = [j for j, val in enumerate(next_state) if val == 0]
                if next_valid_actions:
                    max_next_q = max(next_q_values[i][action] for action in next_valid_actions)
                    target = reward + self.gamma * max_next_q
                else:
                    target = reward
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=32, epochs=1, verbose=0)

def train_agent(episodes=10000):
    agent = SQNAgent()
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    initial_epsilon = 1.0
    min_epsilon = 0.01
    decay_episodes = 1000
    
    for episode in range(episodes):
        if episode % 2000 == 0 and episode > 0:
            agent.epsilon = 1.0
            print("Resetting epsilon to 1.0")
        else:
            epsilon_decay = (initial_epsilon - min_epsilon) / decay_episodes
            agent.epsilon = max(min_epsilon, initial_epsilon - (episode % 2000) * epsilon_decay)
        
        smartness = min(1, episode / (episodes * 0.6))
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 1.0
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            game.player1_move()
            if game.current_winner == 1:
                reward = -1.0
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = 0.0
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        if episode % 100 == 0:
            win_rate = history['wins'] / (episode + 1)
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Epsilon: {agent.epsilon:.3f}, smartmove{smartness}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
        
        if episode % 1000 == 0:
            agent.model.save(f'model3_episode_{episode}.h5')
    agent.model.save('model3.h5')
agent = train_agent()





Episode: 0, Win Rate: 1.00, Epsilon: 1.000, smartmove0.0
Wins: 1, Losses: 0, Draws: 0
Episode: 100, Win Rate: 0.26, Epsilon: 0.901, smartmove0.016666666666666666
Wins: 26, Losses: 59, Draws: 16
Episode: 200, Win Rate: 0.27, Epsilon: 0.802, smartmove0.03333333333333333
Wins: 54, Losses: 120, Draws: 27
Episode: 300, Win Rate: 0.27, Epsilon: 0.703, smartmove0.05
Wins: 82, Losses: 179, Draws: 40
Episode: 400, Win Rate: 0.29, Epsilon: 0.604, smartmove0.06666666666666667
Wins: 115, Losses: 239, Draws: 47
Episode: 500, Win Rate: 0.29, Epsilon: 0.505, smartmove0.08333333333333333
Wins: 146, Losses: 296, Draws: 59
Episode: 600, Win Rate: 0.29, Epsilon: 0.406, smartmove0.1
Wins: 177, Losses: 358, Draws: 66
Episode: 700, Win Rate: 0.30, Epsilon: 0.307, smartmove0.11666666666666667
Wins: 207, Losses: 422, Draws: 72
Episode: 800, Win Rate: 0.30, Epsilon: 0.208, smartmove0.13333333333333333
Wins: 244, Losses: 477, Draws: 80
Episode: 900, Win Rate: 0.31, Epsilon: 0.109, smartmove0.15
Wins: 283, Losse



Episode: 1000, Win Rate: 0.32, Epsilon: 0.010, smartmove0.16666666666666666
Wins: 325, Losses: 581, Draws: 95
Episode: 1100, Win Rate: 0.33, Epsilon: 0.010, smartmove0.18333333333333332
Wins: 364, Losses: 632, Draws: 105
Episode: 1200, Win Rate: 0.33, Epsilon: 0.010, smartmove0.2
Wins: 398, Losses: 691, Draws: 112
Episode: 1300, Win Rate: 0.33, Epsilon: 0.010, smartmove0.21666666666666667
Wins: 430, Losses: 754, Draws: 117
Episode: 1400, Win Rate: 0.33, Epsilon: 0.010, smartmove0.23333333333333334
Wins: 459, Losses: 815, Draws: 127
Episode: 1500, Win Rate: 0.33, Epsilon: 0.010, smartmove0.25
Wins: 495, Losses: 865, Draws: 141
Episode: 1600, Win Rate: 0.33, Epsilon: 0.010, smartmove0.26666666666666666
Wins: 524, Losses: 924, Draws: 153
Episode: 1700, Win Rate: 0.33, Epsilon: 0.010, smartmove0.2833333333333333
Wins: 564, Losses: 973, Draws: 164
Episode: 1800, Win Rate: 0.33, Epsilon: 0.010, smartmove0.3
Wins: 603, Losses: 1023, Draws: 175
Episode: 1900, Win Rate: 0.34, Epsilon: 0.010, sm



Resetting epsilon to 1.0
Episode: 2000, Win Rate: 0.34, Epsilon: 1.000, smartmove0.3333333333333333
Wins: 673, Losses: 1139, Draws: 189
Episode: 2100, Win Rate: 0.33, Epsilon: 0.901, smartmove0.35
Wins: 698, Losses: 1210, Draws: 193
Episode: 2200, Win Rate: 0.33, Epsilon: 0.802, smartmove0.36666666666666664
Wins: 716, Losses: 1276, Draws: 209
Episode: 2300, Win Rate: 0.32, Epsilon: 0.703, smartmove0.38333333333333336
Wins: 730, Losses: 1351, Draws: 220
Episode: 2400, Win Rate: 0.31, Epsilon: 0.604, smartmove0.4
Wins: 746, Losses: 1426, Draws: 229
Episode: 2500, Win Rate: 0.31, Epsilon: 0.505, smartmove0.4166666666666667
Wins: 772, Losses: 1488, Draws: 241
Episode: 2600, Win Rate: 0.31, Epsilon: 0.406, smartmove0.43333333333333335
Wins: 797, Losses: 1557, Draws: 247
Episode: 2700, Win Rate: 0.30, Epsilon: 0.307, smartmove0.45
Wins: 819, Losses: 1630, Draws: 252
Episode: 2800, Win Rate: 0.30, Epsilon: 0.208, smartmove0.4666666666666667
Wins: 846, Losses: 1698, Draws: 257
Episode: 2900, W



Episode: 3000, Win Rate: 0.30, Epsilon: 0.010, smartmove0.5
Wins: 901, Losses: 1822, Draws: 278
Episode: 3100, Win Rate: 0.30, Epsilon: 0.010, smartmove0.5166666666666667
Wins: 916, Losses: 1899, Draws: 286
Episode: 3200, Win Rate: 0.29, Epsilon: 0.010, smartmove0.5333333333333333
Wins: 942, Losses: 1962, Draws: 297
Episode: 3300, Win Rate: 0.29, Epsilon: 0.010, smartmove0.55
Wins: 967, Losses: 2028, Draws: 306
Episode: 3400, Win Rate: 0.29, Epsilon: 0.010, smartmove0.5666666666666667
Wins: 998, Losses: 2087, Draws: 316
Episode: 3500, Win Rate: 0.29, Epsilon: 0.010, smartmove0.5833333333333334
Wins: 1013, Losses: 2157, Draws: 331
Episode: 3600, Win Rate: 0.29, Epsilon: 0.010, smartmove0.6
Wins: 1027, Losses: 2231, Draws: 343
Episode: 3700, Win Rate: 0.28, Epsilon: 0.010, smartmove0.6166666666666667
Wins: 1048, Losses: 2303, Draws: 350
Episode: 3800, Win Rate: 0.28, Epsilon: 0.010, smartmove0.6333333333333333
Wins: 1072, Losses: 2368, Draws: 361
Episode: 3900, Win Rate: 0.28, Epsilon: 0



Resetting epsilon to 1.0
Episode: 4000, Win Rate: 0.28, Epsilon: 1.000, smartmove0.6666666666666666
Wins: 1106, Losses: 2501, Draws: 394
Episode: 4100, Win Rate: 0.27, Epsilon: 0.901, smartmove0.6833333333333333
Wins: 1117, Losses: 2582, Draws: 402
Episode: 4200, Win Rate: 0.27, Epsilon: 0.802, smartmove0.7
Wins: 1129, Losses: 2663, Draws: 409
Episode: 4300, Win Rate: 0.27, Epsilon: 0.703, smartmove0.7166666666666667
Wins: 1142, Losses: 2739, Draws: 420
Episode: 4400, Win Rate: 0.26, Epsilon: 0.604, smartmove0.7333333333333333
Wins: 1155, Losses: 2808, Draws: 438
Episode: 4500, Win Rate: 0.26, Epsilon: 0.505, smartmove0.75
Wins: 1164, Losses: 2892, Draws: 445
Episode: 4600, Win Rate: 0.26, Epsilon: 0.406, smartmove0.7666666666666667
Wins: 1174, Losses: 2973, Draws: 454
Episode: 4700, Win Rate: 0.25, Epsilon: 0.307, smartmove0.7833333333333333
Wins: 1180, Losses: 3051, Draws: 470
Episode: 4800, Win Rate: 0.25, Epsilon: 0.208, smartmove0.8
Wins: 1187, Losses: 3127, Draws: 487
Episode: 49



Episode: 5000, Win Rate: 0.24, Epsilon: 0.010, smartmove0.8333333333333334
Wins: 1210, Losses: 3270, Draws: 521
Episode: 5100, Win Rate: 0.24, Epsilon: 0.010, smartmove0.85
Wins: 1224, Losses: 3332, Draws: 545
Episode: 5200, Win Rate: 0.24, Epsilon: 0.010, smartmove0.8666666666666667
Wins: 1241, Losses: 3390, Draws: 570
Episode: 5300, Win Rate: 0.24, Epsilon: 0.010, smartmove0.8833333333333333
Wins: 1253, Losses: 3462, Draws: 586
Episode: 5400, Win Rate: 0.24, Epsilon: 0.010, smartmove0.9
Wins: 1270, Losses: 3518, Draws: 613
Episode: 5500, Win Rate: 0.23, Epsilon: 0.010, smartmove0.9166666666666666
Wins: 1287, Losses: 3568, Draws: 646
Episode: 5600, Win Rate: 0.23, Epsilon: 0.010, smartmove0.9333333333333333
Wins: 1291, Losses: 3644, Draws: 666
Episode: 5700, Win Rate: 0.23, Epsilon: 0.010, smartmove0.95
Wins: 1296, Losses: 3714, Draws: 691
Episode: 5800, Win Rate: 0.22, Epsilon: 0.010, smartmove0.9666666666666667
Wins: 1299, Losses: 3780, Draws: 722
Episode: 5900, Win Rate: 0.22, Epsi



Resetting epsilon to 1.0
Episode: 6000, Win Rate: 0.22, Epsilon: 1.000, smartmove1
Wins: 1305, Losses: 3907, Draws: 789
Episode: 6100, Win Rate: 0.21, Epsilon: 0.901, smartmove1
Wins: 1305, Losses: 3998, Draws: 798
Episode: 6200, Win Rate: 0.21, Epsilon: 0.802, smartmove1
Wins: 1308, Losses: 4080, Draws: 813
Episode: 6300, Win Rate: 0.21, Epsilon: 0.703, smartmove1
Wins: 1310, Losses: 4168, Draws: 823
Episode: 6400, Win Rate: 0.21, Epsilon: 0.604, smartmove1
Wins: 1314, Losses: 4247, Draws: 840
Episode: 6500, Win Rate: 0.20, Epsilon: 0.505, smartmove1
Wins: 1316, Losses: 4329, Draws: 856
Episode: 6600, Win Rate: 0.20, Epsilon: 0.406, smartmove1
Wins: 1317, Losses: 4402, Draws: 882
Episode: 6700, Win Rate: 0.20, Epsilon: 0.307, smartmove1
Wins: 1322, Losses: 4466, Draws: 913
Episode: 6800, Win Rate: 0.19, Epsilon: 0.208, smartmove1
Wins: 1325, Losses: 4534, Draws: 942
Episode: 6900, Win Rate: 0.19, Epsilon: 0.109, smartmove1
Wins: 1326, Losses: 4608, Draws: 967




Episode: 7000, Win Rate: 0.19, Epsilon: 0.010, smartmove1
Wins: 1329, Losses: 4664, Draws: 1008
Episode: 7100, Win Rate: 0.19, Epsilon: 0.010, smartmove1
Wins: 1331, Losses: 4712, Draws: 1058
Episode: 7200, Win Rate: 0.19, Epsilon: 0.010, smartmove1
Wins: 1334, Losses: 4761, Draws: 1106
Episode: 7300, Win Rate: 0.18, Epsilon: 0.010, smartmove1
Wins: 1342, Losses: 4816, Draws: 1143
Episode: 7400, Win Rate: 0.18, Epsilon: 0.010, smartmove1
Wins: 1349, Losses: 4853, Draws: 1199
Episode: 7500, Win Rate: 0.18, Epsilon: 0.010, smartmove1
Wins: 1354, Losses: 4900, Draws: 1247
Episode: 7600, Win Rate: 0.18, Epsilon: 0.010, smartmove1
Wins: 1359, Losses: 4939, Draws: 1303
Episode: 7700, Win Rate: 0.18, Epsilon: 0.010, smartmove1
Wins: 1360, Losses: 4977, Draws: 1364
Episode: 7800, Win Rate: 0.17, Epsilon: 0.010, smartmove1
Wins: 1364, Losses: 5016, Draws: 1421
Episode: 7900, Win Rate: 0.17, Epsilon: 0.010, smartmove1
Wins: 1367, Losses: 5061, Draws: 1473




Resetting epsilon to 1.0
Episode: 8000, Win Rate: 0.17, Epsilon: 1.000, smartmove1
Wins: 1373, Losses: 5101, Draws: 1527
Episode: 8100, Win Rate: 0.17, Epsilon: 0.901, smartmove1
Wins: 1375, Losses: 5192, Draws: 1534
Episode: 8200, Win Rate: 0.17, Epsilon: 0.802, smartmove1
Wins: 1377, Losses: 5275, Draws: 1549
Episode: 8300, Win Rate: 0.17, Epsilon: 0.703, smartmove1
Wins: 1380, Losses: 5357, Draws: 1564
Episode: 8400, Win Rate: 0.16, Epsilon: 0.604, smartmove1
Wins: 1382, Losses: 5439, Draws: 1580
Episode: 8500, Win Rate: 0.16, Epsilon: 0.505, smartmove1
Wins: 1382, Losses: 5520, Draws: 1599
Episode: 8600, Win Rate: 0.16, Epsilon: 0.406, smartmove1
Wins: 1385, Losses: 5590, Draws: 1626
Episode: 8700, Win Rate: 0.16, Epsilon: 0.307, smartmove1
Wins: 1388, Losses: 5653, Draws: 1660
Episode: 8800, Win Rate: 0.16, Epsilon: 0.208, smartmove1
Wins: 1392, Losses: 5712, Draws: 1697
Episode: 8900, Win Rate: 0.16, Epsilon: 0.109, smartmove1
Wins: 1398, Losses: 5752, Draws: 1751




Episode: 9000, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1405, Losses: 5788, Draws: 1808
Episode: 9100, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1416, Losses: 5818, Draws: 1867
Episode: 9200, Win Rate: 0.15, Epsilon: 0.010, smartmove1
Wins: 1420, Losses: 5849, Draws: 1932
Episode: 9300, Win Rate: 0.15, Epsilon: 0.010, smartmove1
Wins: 1430, Losses: 5874, Draws: 1997
Episode: 9400, Win Rate: 0.15, Epsilon: 0.010, smartmove1
Wins: 1437, Losses: 5899, Draws: 2065
Episode: 9500, Win Rate: 0.15, Epsilon: 0.010, smartmove1
Wins: 1441, Losses: 5922, Draws: 2138
Episode: 9600, Win Rate: 0.15, Epsilon: 0.010, smartmove1
Wins: 1443, Losses: 5949, Draws: 2209
Episode: 9700, Win Rate: 0.15, Epsilon: 0.010, smartmove1
Wins: 1447, Losses: 5978, Draws: 2276
Episode: 9800, Win Rate: 0.15, Epsilon: 0.010, smartmove1
Wins: 1455, Losses: 6004, Draws: 2342
Episode: 9900, Win Rate: 0.15, Epsilon: 0.010, smartmove1
Wins: 1461, Losses: 6030, Draws: 2410




In [9]:
import numpy as np
import random
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.95):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9995
        self.batch_size = 32
        self.replay_buffer = deque(maxlen=10000)
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def process_state(self, state):
        """
        Converts the state from [0,1,2] format to [-1,0,1] format for better learning.
        """
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                next_valid_actions = [j for j, val in enumerate(next_state) if val == 0]
                if next_valid_actions:
                    max_next_q = max(next_q_values[i][action] for action in next_valid_actions)
                    target = reward + self.gamma * max_next_q
                else:
                    target = reward
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=32, epochs=1, verbose=0)

def train_agent(episodes=10000):
    agent = SQNAgent()
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    initial_epsilon = 1.0
    min_epsilon = 0.01
    decay_episodes = 1000
    
    for episode in range(episodes):
        if episode % 2000 == 0 and episode > 0:
            agent.epsilon = 1.0
            print("Resetting epsilon to 1.0")
        else:
            epsilon_decay = (initial_epsilon - min_epsilon) / decay_episodes
            agent.epsilon = max(min_epsilon, initial_epsilon - (episode % 2000) * epsilon_decay)
        
        smartness = min(1, episode / (episodes * 0.6))
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 10.0
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            game.player1_move()
            if game.current_winner == 1:
                reward = -10.0
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = 0.0
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        if episode % 100 == 0:
            win_rate = history['wins'] / (episode + 1)
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Epsilon: {agent.epsilon:.3f}, smartmove{smartness}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
        
        if episode % 1000 == 0:
            agent.model.save(f'model4_episode_{episode}.h5')
    agent.model.save('model4.h5')
agent = train_agent()





Episode: 0, Win Rate: 0.00, Epsilon: 1.000, smartmove0.0
Wins: 0, Losses: 0, Draws: 1
Episode: 100, Win Rate: 0.35, Epsilon: 0.901, smartmove0.016666666666666666
Wins: 35, Losses: 54, Draws: 12
Episode: 200, Win Rate: 0.32, Epsilon: 0.802, smartmove0.03333333333333333
Wins: 64, Losses: 114, Draws: 23
Episode: 300, Win Rate: 0.31, Epsilon: 0.703, smartmove0.05
Wins: 92, Losses: 173, Draws: 36
Episode: 400, Win Rate: 0.34, Epsilon: 0.604, smartmove0.06666666666666667
Wins: 136, Losses: 222, Draws: 43
Episode: 500, Win Rate: 0.34, Epsilon: 0.505, smartmove0.08333333333333333
Wins: 169, Losses: 277, Draws: 55
Episode: 600, Win Rate: 0.33, Epsilon: 0.406, smartmove0.1
Wins: 200, Losses: 339, Draws: 62
Episode: 700, Win Rate: 0.34, Epsilon: 0.307, smartmove0.11666666666666667
Wins: 236, Losses: 393, Draws: 72
Episode: 800, Win Rate: 0.35, Epsilon: 0.208, smartmove0.13333333333333333
Wins: 278, Losses: 442, Draws: 81
Episode: 900, Win Rate: 0.35, Epsilon: 0.109, smartmove0.15
Wins: 318, Losse



Episode: 1000, Win Rate: 0.37, Epsilon: 0.010, smartmove0.16666666666666666
Wins: 369, Losses: 543, Draws: 89
Episode: 1100, Win Rate: 0.37, Epsilon: 0.010, smartmove0.18333333333333332
Wins: 408, Losses: 601, Draws: 92
Episode: 1200, Win Rate: 0.38, Epsilon: 0.010, smartmove0.2
Wins: 457, Losses: 644, Draws: 100
Episode: 1300, Win Rate: 0.39, Epsilon: 0.010, smartmove0.21666666666666667
Wins: 505, Losses: 690, Draws: 106
Episode: 1400, Win Rate: 0.38, Epsilon: 0.010, smartmove0.23333333333333334
Wins: 534, Losses: 755, Draws: 112
Episode: 1500, Win Rate: 0.37, Epsilon: 0.010, smartmove0.25
Wins: 562, Losses: 817, Draws: 122
Episode: 1600, Win Rate: 0.37, Epsilon: 0.010, smartmove0.26666666666666666
Wins: 598, Losses: 872, Draws: 131
Episode: 1700, Win Rate: 0.37, Epsilon: 0.010, smartmove0.2833333333333333
Wins: 629, Losses: 935, Draws: 137
Episode: 1800, Win Rate: 0.36, Epsilon: 0.010, smartmove0.3
Wins: 653, Losses: 998, Draws: 150
Episode: 1900, Win Rate: 0.36, Epsilon: 0.010, smar



Resetting epsilon to 1.0
Episode: 2000, Win Rate: 0.36, Epsilon: 1.000, smartmove0.3333333333333333
Wins: 718, Losses: 1121, Draws: 162
Episode: 2100, Win Rate: 0.35, Epsilon: 0.901, smartmove0.35
Wins: 735, Losses: 1192, Draws: 174
Episode: 2200, Win Rate: 0.34, Epsilon: 0.802, smartmove0.36666666666666664
Wins: 753, Losses: 1264, Draws: 184
Episode: 2300, Win Rate: 0.34, Epsilon: 0.703, smartmove0.38333333333333336
Wins: 774, Losses: 1335, Draws: 192
Episode: 2400, Win Rate: 0.33, Epsilon: 0.604, smartmove0.4
Wins: 786, Losses: 1412, Draws: 203
Episode: 2500, Win Rate: 0.32, Epsilon: 0.505, smartmove0.4166666666666667
Wins: 801, Losses: 1487, Draws: 213
Episode: 2600, Win Rate: 0.32, Epsilon: 0.406, smartmove0.43333333333333335
Wins: 826, Losses: 1557, Draws: 218
Episode: 2700, Win Rate: 0.31, Epsilon: 0.307, smartmove0.45
Wins: 842, Losses: 1635, Draws: 224
Episode: 2800, Win Rate: 0.30, Epsilon: 0.208, smartmove0.4666666666666667
Wins: 854, Losses: 1714, Draws: 233
Episode: 2900, W



Episode: 3000, Win Rate: 0.30, Epsilon: 0.010, smartmove0.5
Wins: 896, Losses: 1855, Draws: 250
Episode: 3100, Win Rate: 0.30, Epsilon: 0.010, smartmove0.5166666666666667
Wins: 921, Losses: 1923, Draws: 257
Episode: 3200, Win Rate: 0.30, Epsilon: 0.010, smartmove0.5333333333333333
Wins: 945, Losses: 1989, Draws: 267
Episode: 3300, Win Rate: 0.29, Epsilon: 0.010, smartmove0.55
Wins: 967, Losses: 2055, Draws: 279
Episode: 3400, Win Rate: 0.29, Epsilon: 0.010, smartmove0.5666666666666667
Wins: 987, Losses: 2129, Draws: 285
Episode: 3500, Win Rate: 0.29, Epsilon: 0.010, smartmove0.5833333333333334
Wins: 1003, Losses: 2200, Draws: 298
Episode: 3600, Win Rate: 0.29, Epsilon: 0.010, smartmove0.6
Wins: 1031, Losses: 2262, Draws: 308
Episode: 3700, Win Rate: 0.29, Epsilon: 0.010, smartmove0.6166666666666667
Wins: 1061, Losses: 2324, Draws: 316
Episode: 3800, Win Rate: 0.28, Epsilon: 0.010, smartmove0.6333333333333333
Wins: 1081, Losses: 2390, Draws: 330
Episode: 3900, Win Rate: 0.28, Epsilon: 0



Resetting epsilon to 1.0
Episode: 4000, Win Rate: 0.28, Epsilon: 1.000, smartmove0.6666666666666666
Wins: 1128, Losses: 2526, Draws: 347
Episode: 4100, Win Rate: 0.28, Epsilon: 0.901, smartmove0.6833333333333333
Wins: 1137, Losses: 2614, Draws: 350
Episode: 4200, Win Rate: 0.27, Epsilon: 0.802, smartmove0.7
Wins: 1148, Losses: 2698, Draws: 355
Episode: 4300, Win Rate: 0.27, Epsilon: 0.703, smartmove0.7166666666666667
Wins: 1156, Losses: 2782, Draws: 363
Episode: 4400, Win Rate: 0.27, Epsilon: 0.604, smartmove0.7333333333333333
Wins: 1169, Losses: 2856, Draws: 376
Episode: 4500, Win Rate: 0.26, Epsilon: 0.505, smartmove0.75
Wins: 1181, Losses: 2933, Draws: 387
Episode: 4600, Win Rate: 0.26, Epsilon: 0.406, smartmove0.7666666666666667
Wins: 1191, Losses: 3011, Draws: 399
Episode: 4700, Win Rate: 0.26, Epsilon: 0.307, smartmove0.7833333333333333
Wins: 1208, Losses: 3082, Draws: 411
Episode: 4800, Win Rate: 0.25, Epsilon: 0.208, smartmove0.8
Wins: 1221, Losses: 3163, Draws: 417
Episode: 49



Episode: 5000, Win Rate: 0.25, Epsilon: 0.010, smartmove0.8333333333333334
Wins: 1250, Losses: 3311, Draws: 440
Episode: 5100, Win Rate: 0.25, Epsilon: 0.010, smartmove0.85
Wins: 1259, Losses: 3392, Draws: 450
Episode: 5200, Win Rate: 0.24, Epsilon: 0.010, smartmove0.8666666666666667
Wins: 1273, Losses: 3469, Draws: 459
Episode: 5300, Win Rate: 0.24, Epsilon: 0.010, smartmove0.8833333333333333
Wins: 1294, Losses: 3535, Draws: 472
Episode: 5400, Win Rate: 0.24, Epsilon: 0.010, smartmove0.9
Wins: 1307, Losses: 3610, Draws: 484
Episode: 5500, Win Rate: 0.24, Epsilon: 0.010, smartmove0.9166666666666666
Wins: 1320, Losses: 3674, Draws: 507
Episode: 5600, Win Rate: 0.24, Epsilon: 0.010, smartmove0.9333333333333333
Wins: 1327, Losses: 3744, Draws: 530
Episode: 5700, Win Rate: 0.23, Epsilon: 0.010, smartmove0.95
Wins: 1334, Losses: 3822, Draws: 545
Episode: 5800, Win Rate: 0.23, Epsilon: 0.010, smartmove0.9666666666666667
Wins: 1345, Losses: 3889, Draws: 567
Episode: 5900, Win Rate: 0.23, Epsi



Resetting epsilon to 1.0
Episode: 6000, Win Rate: 0.23, Epsilon: 1.000, smartmove1
Wins: 1360, Losses: 4027, Draws: 614
Episode: 6100, Win Rate: 0.22, Epsilon: 0.901, smartmove1
Wins: 1361, Losses: 4116, Draws: 624
Episode: 6200, Win Rate: 0.22, Epsilon: 0.802, smartmove1
Wins: 1362, Losses: 4209, Draws: 630
Episode: 6300, Win Rate: 0.22, Epsilon: 0.703, smartmove1
Wins: 1364, Losses: 4296, Draws: 641
Episode: 6400, Win Rate: 0.21, Epsilon: 0.604, smartmove1
Wins: 1365, Losses: 4379, Draws: 657
Episode: 6500, Win Rate: 0.21, Epsilon: 0.505, smartmove1
Wins: 1370, Losses: 4457, Draws: 674
Episode: 6600, Win Rate: 0.21, Epsilon: 0.406, smartmove1
Wins: 1373, Losses: 4539, Draws: 689
Episode: 6700, Win Rate: 0.21, Epsilon: 0.307, smartmove1
Wins: 1376, Losses: 4618, Draws: 707
Episode: 6800, Win Rate: 0.20, Epsilon: 0.208, smartmove1
Wins: 1379, Losses: 4697, Draws: 725
Episode: 6900, Win Rate: 0.20, Epsilon: 0.109, smartmove1
Wins: 1384, Losses: 4770, Draws: 747




Episode: 7000, Win Rate: 0.20, Epsilon: 0.010, smartmove1
Wins: 1390, Losses: 4843, Draws: 768
Episode: 7100, Win Rate: 0.20, Epsilon: 0.010, smartmove1
Wins: 1399, Losses: 4904, Draws: 798
Episode: 7200, Win Rate: 0.20, Epsilon: 0.010, smartmove1
Wins: 1409, Losses: 4963, Draws: 829
Episode: 7300, Win Rate: 0.19, Epsilon: 0.010, smartmove1
Wins: 1412, Losses: 5027, Draws: 862
Episode: 7400, Win Rate: 0.19, Epsilon: 0.010, smartmove1
Wins: 1417, Losses: 5084, Draws: 900
Episode: 7500, Win Rate: 0.19, Epsilon: 0.010, smartmove1
Wins: 1428, Losses: 5142, Draws: 931
Episode: 7600, Win Rate: 0.19, Epsilon: 0.010, smartmove1
Wins: 1437, Losses: 5190, Draws: 974
Episode: 7700, Win Rate: 0.19, Epsilon: 0.010, smartmove1
Wins: 1443, Losses: 5250, Draws: 1008
Episode: 7800, Win Rate: 0.19, Epsilon: 0.010, smartmove1
Wins: 1449, Losses: 5299, Draws: 1053
Episode: 7900, Win Rate: 0.18, Epsilon: 0.010, smartmove1
Wins: 1459, Losses: 5350, Draws: 1092




Resetting epsilon to 1.0
Episode: 8000, Win Rate: 0.18, Epsilon: 1.000, smartmove1
Wins: 1466, Losses: 5401, Draws: 1134
Episode: 8100, Win Rate: 0.18, Epsilon: 0.901, smartmove1
Wins: 1467, Losses: 5493, Draws: 1141
Episode: 8200, Win Rate: 0.18, Epsilon: 0.802, smartmove1
Wins: 1467, Losses: 5589, Draws: 1145
Episode: 8300, Win Rate: 0.18, Epsilon: 0.703, smartmove1
Wins: 1468, Losses: 5682, Draws: 1151
Episode: 8400, Win Rate: 0.17, Epsilon: 0.604, smartmove1
Wins: 1469, Losses: 5767, Draws: 1165
Episode: 8500, Win Rate: 0.17, Epsilon: 0.505, smartmove1
Wins: 1472, Losses: 5850, Draws: 1179
Episode: 8600, Win Rate: 0.17, Epsilon: 0.406, smartmove1
Wins: 1475, Losses: 5924, Draws: 1202
Episode: 8700, Win Rate: 0.17, Epsilon: 0.307, smartmove1
Wins: 1478, Losses: 6004, Draws: 1219
Episode: 8800, Win Rate: 0.17, Epsilon: 0.208, smartmove1
Wins: 1487, Losses: 6068, Draws: 1246
Episode: 8900, Win Rate: 0.17, Epsilon: 0.109, smartmove1
Wins: 1494, Losses: 6123, Draws: 1284




Episode: 9000, Win Rate: 0.17, Epsilon: 0.010, smartmove1
Wins: 1503, Losses: 6172, Draws: 1326
Episode: 9100, Win Rate: 0.17, Epsilon: 0.010, smartmove1
Wins: 1516, Losses: 6219, Draws: 1366
Episode: 9200, Win Rate: 0.17, Epsilon: 0.010, smartmove1
Wins: 1527, Losses: 6259, Draws: 1415
Episode: 9300, Win Rate: 0.17, Epsilon: 0.010, smartmove1
Wins: 1537, Losses: 6299, Draws: 1465
Episode: 9400, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1549, Losses: 6343, Draws: 1509
Episode: 9500, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1557, Losses: 6395, Draws: 1549
Episode: 9600, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1565, Losses: 6436, Draws: 1600
Episode: 9700, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1575, Losses: 6473, Draws: 1653
Episode: 9800, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1589, Losses: 6520, Draws: 1692
Episode: 9900, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1599, Losses: 6570, Draws: 1732




In [10]:
import numpy as np
import random
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.95):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9995
        self.batch_size = 32
        self.replay_buffer = deque(maxlen=10000)
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def process_state(self, state):
        """
        Converts the state from [0,1,2] format to [-1,0,1] format for better learning.
        """
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                next_valid_actions = [j for j, val in enumerate(next_state) if val == 0]
                if next_valid_actions:
                    max_next_q = max(next_q_values[i][action] for action in next_valid_actions)
                    target = reward + self.gamma * max_next_q
                else:
                    target = reward
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=32, epochs=1, verbose=0)

def train_agent(episodes=10000):
    agent = SQNAgent()
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    initial_epsilon = 1.0
    min_epsilon = 0.01
    decay_episodes = 1000
    
    for episode in range(episodes):
        if episode % 2000 == 0 and episode > 0:
            agent.epsilon = 1.0
            print("Resetting epsilon to 1.0")
        else:
            epsilon_decay = (initial_epsilon - min_epsilon) / decay_episodes
            agent.epsilon = max(min_epsilon, initial_epsilon - (episode % 2000) * epsilon_decay)
        
        smartness = min(1, episode / (episodes * 0.6))
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 1.0
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            game.player1_move()
            if game.current_winner == 1:
                reward = -1.0
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = -0.5
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        if episode % 100 == 0:
            win_rate = history['wins'] / (episode + 1)
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Epsilon: {agent.epsilon:.3f}, smartmove{smartness}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
        
        if episode % 1000 == 0:
            agent.model.save(f'model5_episode_{episode}.h5')
    agent.model.save('model5.h5')
agent = train_agent()



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Episode: 0, Win Rate: 1.00, Epsilon: 1.000, smartmove0.0
Wins: 1, Losses: 0, Draws: 0
Episode: 100, Win Rate: 0.32, Epsilon: 0.901, smartmove0.016666666666666666
Wins: 32, Losses: 59, Draws: 10
Episode: 200, Win Rate: 0.32, Epsilon: 0.802, smartmove0.03333333333333333
Wins: 65, Losses: 118, Draws: 18
Episode: 300, Win Rate: 0.31, Epsilon: 0.703, smartmove0.05
Wins: 93, Losses: 180, Draws: 28
Episode: 400, Win Rate: 0.30, Epsilon: 0.604, smartmove0.06666666666666667
Wins: 120, Losses: 243, Draws: 38
Episode: 500, Win Rate: 0.29, Epsilon: 0.505, smartmove0.08333333333333333
Wins: 145, Losses: 310, Draws: 46
Episode: 600, Win Rate: 0.29, Epsilon: 0.406, smartmove0.1
Wins: 175, Losses: 370, Draws: 56
Episode: 700, Win Rate: 0.29, Epsilon: 0.307, smartmove0.11666666666666667
Wins: 202, Losses: 435, Draws: 64
Episode: 800, Win Rate: 0.29, Epsilon: 0.208, smartmove0.13333333333333333
Wins: 231, Losses: 500, Draws: 70
Episode: 900, Win Rate: 0.30, Epsilon: 0.109, smartmove0.15
Wins: 266, Losse



Episode: 1000, Win Rate: 0.30, Epsilon: 0.010, smartmove0.16666666666666666
Wins: 299, Losses: 622, Draws: 80
Episode: 1100, Win Rate: 0.30, Epsilon: 0.010, smartmove0.18333333333333332
Wins: 328, Losses: 688, Draws: 85
Episode: 1200, Win Rate: 0.30, Epsilon: 0.010, smartmove0.2
Wins: 362, Losses: 750, Draws: 89
Episode: 1300, Win Rate: 0.30, Epsilon: 0.010, smartmove0.21666666666666667
Wins: 391, Losses: 817, Draws: 93
Episode: 1400, Win Rate: 0.30, Epsilon: 0.010, smartmove0.23333333333333334
Wins: 422, Losses: 884, Draws: 95
Episode: 1500, Win Rate: 0.29, Epsilon: 0.010, smartmove0.25
Wins: 439, Losses: 953, Draws: 109
Episode: 1600, Win Rate: 0.29, Epsilon: 0.010, smartmove0.26666666666666666
Wins: 463, Losses: 1021, Draws: 117
Episode: 1700, Win Rate: 0.29, Epsilon: 0.010, smartmove0.2833333333333333
Wins: 489, Losses: 1083, Draws: 129
Episode: 1800, Win Rate: 0.29, Epsilon: 0.010, smartmove0.3
Wins: 515, Losses: 1149, Draws: 137
Episode: 1900, Win Rate: 0.28, Epsilon: 0.010, smar



Resetting epsilon to 1.0
Episode: 2000, Win Rate: 0.28, Epsilon: 1.000, smartmove0.3333333333333333
Wins: 567, Losses: 1281, Draws: 153
Episode: 2100, Win Rate: 0.28, Epsilon: 0.901, smartmove0.35
Wins: 588, Losses: 1347, Draws: 166
Episode: 2200, Win Rate: 0.28, Epsilon: 0.802, smartmove0.36666666666666664
Wins: 610, Losses: 1414, Draws: 177
Episode: 2300, Win Rate: 0.27, Epsilon: 0.703, smartmove0.38333333333333336
Wins: 630, Losses: 1489, Draws: 182
Episode: 2400, Win Rate: 0.27, Epsilon: 0.604, smartmove0.4
Wins: 648, Losses: 1558, Draws: 195
Episode: 2500, Win Rate: 0.27, Epsilon: 0.505, smartmove0.4166666666666667
Wins: 667, Losses: 1629, Draws: 205
Episode: 2600, Win Rate: 0.26, Epsilon: 0.406, smartmove0.43333333333333335
Wins: 689, Losses: 1700, Draws: 212
Episode: 2700, Win Rate: 0.26, Epsilon: 0.307, smartmove0.45
Wins: 710, Losses: 1768, Draws: 223
Episode: 2800, Win Rate: 0.26, Epsilon: 0.208, smartmove0.4666666666666667
Wins: 728, Losses: 1839, Draws: 234
Episode: 2900, W



Episode: 3000, Win Rate: 0.26, Epsilon: 0.010, smartmove0.5
Wins: 768, Losses: 1985, Draws: 248
Episode: 3100, Win Rate: 0.25, Epsilon: 0.010, smartmove0.5166666666666667
Wins: 785, Losses: 2062, Draws: 254
Episode: 3200, Win Rate: 0.25, Epsilon: 0.010, smartmove0.5333333333333333
Wins: 811, Losses: 2130, Draws: 260
Episode: 3300, Win Rate: 0.25, Epsilon: 0.010, smartmove0.55
Wins: 834, Losses: 2201, Draws: 266
Episode: 3400, Win Rate: 0.25, Epsilon: 0.010, smartmove0.5666666666666667
Wins: 852, Losses: 2270, Draws: 279
Episode: 3500, Win Rate: 0.25, Epsilon: 0.010, smartmove0.5833333333333334
Wins: 878, Losses: 2335, Draws: 288
Episode: 3600, Win Rate: 0.25, Epsilon: 0.010, smartmove0.6
Wins: 897, Losses: 2408, Draws: 296
Episode: 3700, Win Rate: 0.25, Epsilon: 0.010, smartmove0.6166666666666667
Wins: 919, Losses: 2471, Draws: 311
Episode: 3800, Win Rate: 0.24, Epsilon: 0.010, smartmove0.6333333333333333
Wins: 926, Losses: 2549, Draws: 326
Episode: 3900, Win Rate: 0.24, Epsilon: 0.010



Resetting epsilon to 1.0
Episode: 4000, Win Rate: 0.24, Epsilon: 1.000, smartmove0.6666666666666666
Wins: 961, Losses: 2697, Draws: 343
Episode: 4100, Win Rate: 0.24, Epsilon: 0.901, smartmove0.6833333333333333
Wins: 967, Losses: 2782, Draws: 352
Episode: 4200, Win Rate: 0.23, Epsilon: 0.802, smartmove0.7
Wins: 977, Losses: 2865, Draws: 359
Episode: 4300, Win Rate: 0.23, Epsilon: 0.703, smartmove0.7166666666666667
Wins: 989, Losses: 2944, Draws: 368
Episode: 4400, Win Rate: 0.23, Epsilon: 0.604, smartmove0.7333333333333333
Wins: 998, Losses: 3022, Draws: 381
Episode: 4500, Win Rate: 0.23, Epsilon: 0.505, smartmove0.75
Wins: 1014, Losses: 3097, Draws: 390
Episode: 4600, Win Rate: 0.22, Epsilon: 0.406, smartmove0.7666666666666667
Wins: 1022, Losses: 3176, Draws: 403
Episode: 4700, Win Rate: 0.22, Epsilon: 0.307, smartmove0.7833333333333333
Wins: 1032, Losses: 3259, Draws: 410
Episode: 4800, Win Rate: 0.22, Epsilon: 0.208, smartmove0.8
Wins: 1042, Losses: 3339, Draws: 420
Episode: 4900, W



Episode: 5000, Win Rate: 0.21, Epsilon: 0.010, smartmove0.8333333333333334
Wins: 1060, Losses: 3485, Draws: 456
Episode: 5100, Win Rate: 0.21, Epsilon: 0.010, smartmove0.85
Wins: 1070, Losses: 3553, Draws: 478
Episode: 5200, Win Rate: 0.21, Epsilon: 0.010, smartmove0.8666666666666667
Wins: 1092, Losses: 3616, Draws: 493
Episode: 5300, Win Rate: 0.21, Epsilon: 0.010, smartmove0.8833333333333333
Wins: 1105, Losses: 3684, Draws: 512
Episode: 5400, Win Rate: 0.21, Epsilon: 0.010, smartmove0.9
Wins: 1121, Losses: 3750, Draws: 530
Episode: 5500, Win Rate: 0.21, Epsilon: 0.010, smartmove0.9166666666666666
Wins: 1134, Losses: 3818, Draws: 549
Episode: 5600, Win Rate: 0.20, Epsilon: 0.010, smartmove0.9333333333333333
Wins: 1146, Losses: 3882, Draws: 573
Episode: 5700, Win Rate: 0.20, Epsilon: 0.010, smartmove0.95
Wins: 1151, Losses: 3946, Draws: 604
Episode: 5800, Win Rate: 0.20, Epsilon: 0.010, smartmove0.9666666666666667
Wins: 1156, Losses: 4005, Draws: 640
Episode: 5900, Win Rate: 0.20, Epsi



Resetting epsilon to 1.0
Episode: 6000, Win Rate: 0.20, Epsilon: 1.000, smartmove1
Wins: 1179, Losses: 4123, Draws: 699
Episode: 6100, Win Rate: 0.19, Epsilon: 0.901, smartmove1
Wins: 1183, Losses: 4205, Draws: 713
Episode: 6200, Win Rate: 0.19, Epsilon: 0.802, smartmove1
Wins: 1184, Losses: 4294, Draws: 723
Episode: 6300, Win Rate: 0.19, Epsilon: 0.703, smartmove1
Wins: 1185, Losses: 4383, Draws: 733
Episode: 6400, Win Rate: 0.19, Epsilon: 0.604, smartmove1
Wins: 1186, Losses: 4472, Draws: 743
Episode: 6500, Win Rate: 0.18, Epsilon: 0.505, smartmove1
Wins: 1188, Losses: 4557, Draws: 756
Episode: 6600, Win Rate: 0.18, Epsilon: 0.406, smartmove1
Wins: 1193, Losses: 4627, Draws: 781
Episode: 6700, Win Rate: 0.18, Epsilon: 0.307, smartmove1
Wins: 1197, Losses: 4700, Draws: 804
Episode: 6800, Win Rate: 0.18, Epsilon: 0.208, smartmove1
Wins: 1199, Losses: 4781, Draws: 821
Episode: 6900, Win Rate: 0.17, Epsilon: 0.109, smartmove1
Wins: 1200, Losses: 4846, Draws: 855




Episode: 7000, Win Rate: 0.17, Epsilon: 0.010, smartmove1
Wins: 1201, Losses: 4915, Draws: 885
Episode: 7100, Win Rate: 0.17, Epsilon: 0.010, smartmove1
Wins: 1212, Losses: 4955, Draws: 934
Episode: 7200, Win Rate: 0.17, Epsilon: 0.010, smartmove1
Wins: 1221, Losses: 5007, Draws: 973
Episode: 7300, Win Rate: 0.17, Epsilon: 0.010, smartmove1
Wins: 1228, Losses: 5056, Draws: 1017
Episode: 7400, Win Rate: 0.17, Epsilon: 0.010, smartmove1
Wins: 1230, Losses: 5114, Draws: 1057
Episode: 7500, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1236, Losses: 5155, Draws: 1110
Episode: 7600, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1240, Losses: 5201, Draws: 1160
Episode: 7700, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1241, Losses: 5246, Draws: 1214
Episode: 7800, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1245, Losses: 5288, Draws: 1268
Episode: 7900, Win Rate: 0.16, Epsilon: 0.010, smartmove1
Wins: 1251, Losses: 5342, Draws: 1308




Resetting epsilon to 1.0
Episode: 8000, Win Rate: 0.16, Epsilon: 1.000, smartmove1
Wins: 1254, Losses: 5391, Draws: 1356
Episode: 8100, Win Rate: 0.16, Epsilon: 0.901, smartmove1
Wins: 1257, Losses: 5479, Draws: 1365
Episode: 8200, Win Rate: 0.15, Epsilon: 0.802, smartmove1
Wins: 1258, Losses: 5569, Draws: 1374
Episode: 8300, Win Rate: 0.15, Epsilon: 0.703, smartmove1
Wins: 1262, Losses: 5653, Draws: 1386
Episode: 8400, Win Rate: 0.15, Epsilon: 0.604, smartmove1
Wins: 1264, Losses: 5734, Draws: 1403
Episode: 8500, Win Rate: 0.15, Epsilon: 0.505, smartmove1
Wins: 1265, Losses: 5809, Draws: 1427
Episode: 8600, Win Rate: 0.15, Epsilon: 0.406, smartmove1
Wins: 1268, Losses: 5885, Draws: 1448
Episode: 8700, Win Rate: 0.15, Epsilon: 0.307, smartmove1
Wins: 1268, Losses: 5964, Draws: 1469
Episode: 8800, Win Rate: 0.14, Epsilon: 0.208, smartmove1
Wins: 1272, Losses: 6025, Draws: 1504
Episode: 8900, Win Rate: 0.14, Epsilon: 0.109, smartmove1
Wins: 1280, Losses: 6066, Draws: 1555




Episode: 9000, Win Rate: 0.14, Epsilon: 0.010, smartmove1
Wins: 1281, Losses: 6109, Draws: 1611
Episode: 9100, Win Rate: 0.14, Epsilon: 0.010, smartmove1
Wins: 1283, Losses: 6142, Draws: 1676
Episode: 9200, Win Rate: 0.14, Epsilon: 0.010, smartmove1
Wins: 1288, Losses: 6173, Draws: 1740
Episode: 9300, Win Rate: 0.14, Epsilon: 0.010, smartmove1
Wins: 1292, Losses: 6208, Draws: 1801
Episode: 9400, Win Rate: 0.14, Epsilon: 0.010, smartmove1
Wins: 1292, Losses: 6248, Draws: 1861
Episode: 9500, Win Rate: 0.14, Epsilon: 0.010, smartmove1
Wins: 1295, Losses: 6288, Draws: 1918
Episode: 9600, Win Rate: 0.14, Epsilon: 0.010, smartmove1
Wins: 1298, Losses: 6317, Draws: 1986
Episode: 9700, Win Rate: 0.13, Epsilon: 0.010, smartmove1
Wins: 1299, Losses: 6349, Draws: 2053
Episode: 9800, Win Rate: 0.13, Epsilon: 0.010, smartmove1
Wins: 1304, Losses: 6383, Draws: 2114
Episode: 9900, Win Rate: 0.13, Epsilon: 0.010, smartmove1
Wins: 1305, Losses: 6427, Draws: 2169




In [None]:
import numpy as np
import random
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.95):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9995
        self.batch_size = 32
        self.replay_buffer = deque(maxlen=10000)
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def process_state(self, state):
        """
        Converts the state from [0,1,2] format to [-1,0,1] format for better learning.
        """
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                next_valid_actions = [j for j, val in enumerate(next_state) if val == 0]
                if next_valid_actions:
                    max_next_q = max(next_q_values[i][action] for action in next_valid_actions)
                    target = reward + self.gamma * max_next_q
                else:
                    target = reward
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=32, epochs=1, verbose=0)

def train_agent(episodes=10000):
    agent = SQNAgent()
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    initial_epsilon = 1.0
    min_epsilon = 0.01
    decay_episodes = 1000
    
    for episode in range(episodes):
        if episode % 1000 == 0 and episode > 0:
            agent.epsilon = 1.0
            print("Resetting epsilon to 1.0")
        else:
            epsilon_decay = (initial_epsilon - min_epsilon) / decay_episodes
            agent.epsilon = max(min_epsilon, initial_epsilon - (episode % 1000) * epsilon_decay)
        
        smartness = min(1, episode / (episodes * 0.6))
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 1.0
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            game.player1_move()
            if game.current_winner == 1:
                reward = -1.0
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = min(-0.1,-0.5*smartness)
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        if episode % 100 == 0:
            win_rate = history['wins'] / (episode + 1)
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Epsilon: {agent.epsilon:.3f}, smartmove{smartness}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
        
        if episode % 1000 == 0:
            agent.model.save(f'model6_episode_{episode}.h5')
    agent.model.save('model6.h5')
agent = train_agent()





Episode: 0, Win Rate: 0.00, Epsilon: 1.000, smartmove0.0
Wins: 0, Losses: 1, Draws: 0
Episode: 100, Win Rate: 0.30, Epsilon: 0.901, smartmove0.016666666666666666
Wins: 30, Losses: 63, Draws: 8
Episode: 200, Win Rate: 0.28, Epsilon: 0.802, smartmove0.03333333333333333
Wins: 57, Losses: 127, Draws: 17
Episode: 300, Win Rate: 0.31, Epsilon: 0.703, smartmove0.05
Wins: 94, Losses: 173, Draws: 34
Episode: 400, Win Rate: 0.31, Epsilon: 0.604, smartmove0.06666666666666667
Wins: 123, Losses: 239, Draws: 39
Episode: 500, Win Rate: 0.32, Epsilon: 0.505, smartmove0.08333333333333333
Wins: 158, Losses: 291, Draws: 52
Episode: 600, Win Rate: 0.32, Epsilon: 0.406, smartmove0.1
Wins: 190, Losses: 345, Draws: 66
Episode: 700, Win Rate: 0.32, Epsilon: 0.307, smartmove0.11666666666666667
Wins: 221, Losses: 405, Draws: 75
Episode: 800, Win Rate: 0.32, Epsilon: 0.208, smartmove0.13333333333333333
Wins: 257, Losses: 461, Draws: 83
Episode: 900, Win Rate: 0.32, Epsilon: 0.109, smartmove0.15
Wins: 285, Losses



Resetting epsilon to 1.0
Episode: 1000, Win Rate: 0.32, Epsilon: 1.000, smartmove0.16666666666666666
Wins: 318, Losses: 575, Draws: 108
Episode: 1100, Win Rate: 0.31, Epsilon: 0.901, smartmove0.18333333333333332
Wins: 341, Losses: 639, Draws: 121
Episode: 1200, Win Rate: 0.30, Epsilon: 0.802, smartmove0.2
Wins: 361, Losses: 704, Draws: 136
Episode: 1300, Win Rate: 0.30, Epsilon: 0.703, smartmove0.21666666666666667
Wins: 385, Losses: 769, Draws: 147
Episode: 1400, Win Rate: 0.29, Epsilon: 0.604, smartmove0.23333333333333334
Wins: 412, Losses: 830, Draws: 159
Episode: 1500, Win Rate: 0.30, Epsilon: 0.505, smartmove0.25
Wins: 447, Losses: 884, Draws: 170
Episode: 1600, Win Rate: 0.30, Epsilon: 0.406, smartmove0.26666666666666666
Wins: 473, Losses: 947, Draws: 181
Episode: 1700, Win Rate: 0.30, Epsilon: 0.307, smartmove0.2833333333333333
Wins: 502, Losses: 1011, Draws: 188
Episode: 1800, Win Rate: 0.30, Epsilon: 0.208, smartmove0.3
Wins: 534, Losses: 1074, Draws: 193
Episode: 1900, Win Rat



Resetting epsilon to 1.0
Episode: 2000, Win Rate: 0.29, Epsilon: 1.000, smartmove0.3333333333333333
Wins: 582, Losses: 1209, Draws: 210
Episode: 2100, Win Rate: 0.29, Epsilon: 0.901, smartmove0.35
Wins: 601, Losses: 1277, Draws: 223
Episode: 2200, Win Rate: 0.28, Epsilon: 0.802, smartmove0.36666666666666664
Wins: 624, Losses: 1343, Draws: 234
Episode: 2300, Win Rate: 0.28, Epsilon: 0.703, smartmove0.38333333333333336
Wins: 644, Losses: 1411, Draws: 246
Episode: 2400, Win Rate: 0.28, Epsilon: 0.604, smartmove0.4
Wins: 671, Losses: 1472, Draws: 258
Episode: 2500, Win Rate: 0.28, Epsilon: 0.505, smartmove0.4166666666666667
Wins: 693, Losses: 1547, Draws: 261
Episode: 2600, Win Rate: 0.28, Epsilon: 0.406, smartmove0.43333333333333335
Wins: 718, Losses: 1613, Draws: 270
Episode: 2700, Win Rate: 0.28, Epsilon: 0.307, smartmove0.45
Wins: 748, Losses: 1666, Draws: 287
Episode: 2800, Win Rate: 0.27, Epsilon: 0.208, smartmove0.4666666666666667
Wins: 766, Losses: 1736, Draws: 299
Episode: 2900, W



Resetting epsilon to 1.0
Episode: 3000, Win Rate: 0.27, Epsilon: 1.000, smartmove0.5
Wins: 803, Losses: 1884, Draws: 314
Episode: 3100, Win Rate: 0.26, Epsilon: 0.901, smartmove0.5166666666666667
Wins: 819, Losses: 1959, Draws: 323
Episode: 3200, Win Rate: 0.26, Epsilon: 0.802, smartmove0.5333333333333333
Wins: 837, Losses: 2030, Draws: 334
Episode: 3300, Win Rate: 0.26, Epsilon: 0.703, smartmove0.55
Wins: 859, Losses: 2096, Draws: 346
Episode: 3400, Win Rate: 0.26, Epsilon: 0.604, smartmove0.5666666666666667
Wins: 875, Losses: 2175, Draws: 351
Episode: 3500, Win Rate: 0.25, Epsilon: 0.505, smartmove0.5833333333333334
Wins: 891, Losses: 2249, Draws: 361
Episode: 3600, Win Rate: 0.25, Epsilon: 0.406, smartmove0.6
Wins: 908, Losses: 2328, Draws: 365
Episode: 3700, Win Rate: 0.25, Epsilon: 0.307, smartmove0.6166666666666667
Wins: 920, Losses: 2399, Draws: 382
Episode: 3800, Win Rate: 0.25, Epsilon: 0.208, smartmove0.6333333333333333
Wins: 935, Losses: 2466, Draws: 400
Episode: 3900, Win R



Resetting epsilon to 1.0
Episode: 4000, Win Rate: 0.24, Epsilon: 1.000, smartmove0.6666666666666666
Wins: 974, Losses: 2607, Draws: 420
Episode: 4100, Win Rate: 0.24, Epsilon: 0.901, smartmove0.6833333333333333
Wins: 986, Losses: 2689, Draws: 426
Episode: 4200, Win Rate: 0.24, Epsilon: 0.802, smartmove0.7
Wins: 994, Losses: 2770, Draws: 437
Episode: 4300, Win Rate: 0.23, Epsilon: 0.703, smartmove0.7166666666666667
Wins: 1004, Losses: 2846, Draws: 451
Episode: 4400, Win Rate: 0.23, Epsilon: 0.604, smartmove0.7333333333333333
Wins: 1011, Losses: 2931, Draws: 459
Episode: 4500, Win Rate: 0.23, Epsilon: 0.505, smartmove0.75
Wins: 1025, Losses: 3011, Draws: 465
Episode: 4600, Win Rate: 0.23, Epsilon: 0.406, smartmove0.7666666666666667
Wins: 1036, Losses: 3086, Draws: 479
Episode: 4700, Win Rate: 0.22, Epsilon: 0.307, smartmove0.7833333333333333
Wins: 1049, Losses: 3165, Draws: 487
Episode: 4800, Win Rate: 0.22, Epsilon: 0.208, smartmove0.8
Wins: 1058, Losses: 3242, Draws: 501
Episode: 4900,



Resetting epsilon to 1.0
Episode: 5000, Win Rate: 0.21, Epsilon: 1.000, smartmove0.8333333333333334
Wins: 1073, Losses: 3383, Draws: 545
Episode: 5100, Win Rate: 0.21, Epsilon: 0.901, smartmove0.85
Wins: 1078, Losses: 3468, Draws: 555
Episode: 5200, Win Rate: 0.21, Epsilon: 0.802, smartmove0.8666666666666667
Wins: 1084, Losses: 3553, Draws: 564
Episode: 5300, Win Rate: 0.20, Epsilon: 0.703, smartmove0.8833333333333333
Wins: 1084, Losses: 3640, Draws: 577
Episode: 5400, Win Rate: 0.20, Epsilon: 0.604, smartmove0.9
Wins: 1093, Losses: 3716, Draws: 592
Episode: 5500, Win Rate: 0.20, Epsilon: 0.505, smartmove0.9166666666666666
Wins: 1099, Losses: 3800, Draws: 602
Episode: 5600, Win Rate: 0.20, Epsilon: 0.406, smartmove0.9333333333333333
Wins: 1103, Losses: 3882, Draws: 616
Episode: 5700, Win Rate: 0.19, Epsilon: 0.307, smartmove0.95
Wins: 1108, Losses: 3960, Draws: 633
Episode: 5800, Win Rate: 0.19, Epsilon: 0.208, smartmove0.9666666666666667
Wins: 1112, Losses: 4042, Draws: 647
Episode: 5



Resetting epsilon to 1.0
Episode: 6000, Win Rate: 0.19, Epsilon: 1.000, smartmove1
Wins: 1116, Losses: 4194, Draws: 691
Episode: 6100, Win Rate: 0.18, Epsilon: 0.901, smartmove1
Wins: 1118, Losses: 4284, Draws: 699
Episode: 6200, Win Rate: 0.18, Epsilon: 0.802, smartmove1
Wins: 1120, Losses: 4373, Draws: 708
Episode: 6300, Win Rate: 0.18, Epsilon: 0.703, smartmove1
Wins: 1120, Losses: 4459, Draws: 722
Episode: 6400, Win Rate: 0.18, Epsilon: 0.604, smartmove1
Wins: 1121, Losses: 4539, Draws: 741
Episode: 6500, Win Rate: 0.17, Epsilon: 0.505, smartmove1
Wins: 1122, Losses: 4623, Draws: 756
Episode: 6600, Win Rate: 0.17, Epsilon: 0.406, smartmove1
Wins: 1125, Losses: 4710, Draws: 766
Episode: 6700, Win Rate: 0.17, Epsilon: 0.307, smartmove1
Wins: 1128, Losses: 4783, Draws: 790
Episode: 6800, Win Rate: 0.17, Epsilon: 0.208, smartmove1
Wins: 1130, Losses: 4858, Draws: 813
Episode: 6900, Win Rate: 0.16, Epsilon: 0.109, smartmove1
Wins: 1133, Losses: 4923, Draws: 845




Resetting epsilon to 1.0
Episode: 7000, Win Rate: 0.16, Epsilon: 1.000, smartmove1
Wins: 1138, Losses: 4988, Draws: 875
Episode: 7100, Win Rate: 0.16, Epsilon: 0.901, smartmove1
Wins: 1139, Losses: 5079, Draws: 883
Episode: 7200, Win Rate: 0.16, Epsilon: 0.802, smartmove1
Wins: 1143, Losses: 5165, Draws: 893
Episode: 7300, Win Rate: 0.16, Epsilon: 0.703, smartmove1
Wins: 1145, Losses: 5254, Draws: 902
Episode: 7400, Win Rate: 0.15, Epsilon: 0.604, smartmove1
Wins: 1146, Losses: 5337, Draws: 918
Episode: 7500, Win Rate: 0.15, Epsilon: 0.505, smartmove1
Wins: 1149, Losses: 5416, Draws: 936
Episode: 7600, Win Rate: 0.15, Epsilon: 0.406, smartmove1
Wins: 1149, Losses: 5500, Draws: 952
Episode: 7700, Win Rate: 0.15, Epsilon: 0.307, smartmove1
Wins: 1155, Losses: 5557, Draws: 989
Episode: 7800, Win Rate: 0.15, Epsilon: 0.208, smartmove1
Wins: 1158, Losses: 5618, Draws: 1025
Episode: 7900, Win Rate: 0.15, Epsilon: 0.109, smartmove1
Wins: 1165, Losses: 5672, Draws: 1064




Resetting epsilon to 1.0
Episode: 8000, Win Rate: 0.15, Epsilon: 1.000, smartmove1
Wins: 1172, Losses: 5729, Draws: 1100
Episode: 8100, Win Rate: 0.15, Epsilon: 0.901, smartmove1
Wins: 1175, Losses: 5812, Draws: 1114
Episode: 8200, Win Rate: 0.14, Epsilon: 0.802, smartmove1
Wins: 1177, Losses: 5896, Draws: 1128
Episode: 8300, Win Rate: 0.14, Epsilon: 0.703, smartmove1
Wins: 1180, Losses: 5981, Draws: 1140
Episode: 8400, Win Rate: 0.14, Epsilon: 0.604, smartmove1
Wins: 1184, Losses: 6057, Draws: 1160
Episode: 8500, Win Rate: 0.14, Epsilon: 0.505, smartmove1
Wins: 1188, Losses: 6130, Draws: 1183
Episode: 8600, Win Rate: 0.14, Epsilon: 0.406, smartmove1
Wins: 1190, Losses: 6202, Draws: 1209
Episode: 8700, Win Rate: 0.14, Epsilon: 0.307, smartmove1
Wins: 1194, Losses: 6253, Draws: 1254
Episode: 8800, Win Rate: 0.14, Epsilon: 0.208, smartmove1
Wins: 1200, Losses: 6304, Draws: 1297
Episode: 8900, Win Rate: 0.14, Epsilon: 0.109, smartmove1
Wins: 1207, Losses: 6354, Draws: 1340




Resetting epsilon to 1.0
Episode: 9000, Win Rate: 0.13, Epsilon: 1.000, smartmove1
Wins: 1211, Losses: 6403, Draws: 1387
Episode: 9100, Win Rate: 0.13, Epsilon: 0.901, smartmove1
Wins: 1211, Losses: 6491, Draws: 1399
Episode: 9200, Win Rate: 0.13, Epsilon: 0.802, smartmove1
Wins: 1213, Losses: 6578, Draws: 1410
Episode: 9300, Win Rate: 0.13, Epsilon: 0.703, smartmove1
Wins: 1216, Losses: 6663, Draws: 1422
Episode: 9400, Win Rate: 0.13, Epsilon: 0.604, smartmove1
Wins: 1217, Losses: 6744, Draws: 1440
Episode: 9500, Win Rate: 0.13, Epsilon: 0.505, smartmove1
Wins: 1219, Losses: 6821, Draws: 1461
Episode: 9600, Win Rate: 0.13, Epsilon: 0.406, smartmove1
Wins: 1224, Losses: 6885, Draws: 1492
Episode: 9700, Win Rate: 0.13, Epsilon: 0.307, smartmove1
Wins: 1229, Losses: 6948, Draws: 1524
Episode: 9800, Win Rate: 0.13, Epsilon: 0.208, smartmove1
Wins: 1233, Losses: 7004, Draws: 1564
Episode: 9900, Win Rate: 0.13, Epsilon: 0.109, smartmove1
Wins: 1239, Losses: 7056, Draws: 1606




In [2]:
import numpy as np
import random
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.95):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9995
        self.batch_size = 32
        self.replay_buffer = deque(maxlen=10000)
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def process_state(self, state):
        """
        Converts the state from [0,1,2] format to [-1,0,1] format for better learning.
        """
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                next_valid_actions = [j for j, val in enumerate(next_state) if val == 0]
                if next_valid_actions:
                    max_next_q = max(next_q_values[i][action] for action in next_valid_actions)
                    target = reward + self.gamma * max_next_q
                else:
                    target = reward
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=32, epochs=1, verbose=0)

def train_agent(episodes=10000):
    agent = SQNAgent()
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    initial_epsilon = 1.0
    min_epsilon = 0.01
    decay_episodes = 1000
    
    for episode in range(episodes):
        if episode % 1000 == 0 and episode > 0:
            agent.epsilon = 1.0
            print("Resetting epsilon to 1.0")
        else:
            epsilon_decay = (initial_epsilon - min_epsilon) / decay_episodes
            agent.epsilon = max(min_epsilon, initial_epsilon - (episode % 2000) * epsilon_decay)
        
        smartness = min(1, episode / (episodes))
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 1
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            game.player1_move()
            if game.current_winner == 1:
                reward = -1
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = -max(1,0.5*smartness)*8/10
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        if episode % 100 == 0:
            win_rate = history['wins'] / (episode + 1)
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Epsilon: {agent.epsilon:.3f}, smartmove{smartness}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
        
        if episode % 1000 == 0:
            agent.model.save(f'model7_episode_{episode}.h5')
    agent.model.save('model7.h5')
agent = train_agent()



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Episode: 0, Win Rate: 1.00, Epsilon: 1.000, smartmove0.0
Wins: 1, Losses: 0, Draws: 0
Episode: 100, Win Rate: 0.27, Epsilon: 0.901, smartmove0.01
Wins: 27, Losses: 58, Draws: 16
Episode: 200, Win Rate: 0.27, Epsilon: 0.802, smartmove0.02
Wins: 54, Losses: 116, Draws: 31
Episode: 300, Win Rate: 0.28, Epsilon: 0.703, smartmove0.03
Wins: 85, Losses: 173, Draws: 43
Episode: 400, Win Rate: 0.27, Epsilon: 0.604, smartmove0.04
Wins: 108, Losses: 239, Draws: 54
Episode: 500, Win Rate: 0.28, Epsilon: 0.505, smartmove0.05
Wins: 141, Losses: 299, Draws: 61
Episode: 600, Win Rate: 0.29, Epsilon: 0.406, smartmove0.06
Wins: 174, Losses: 356, Draws: 71
Episode: 700, Win Rate: 0.29, Epsilon: 0.307, smartmove0.07
Wins: 203, Losses: 420, Draws: 78
Episode: 800, Win Rate: 0.29, Epsilon: 0.208, smartmove0.08
Wins: 230, Losses: 486, Draws: 85
Episode: 900, Win Rate: 0.29, Epsilon: 0.109, smartmove0.09
Wins: 264, Losses: 548, Draws: 89




Resetting epsilon to 1.0
Episode: 1000, Win Rate: 0.29, Epsilon: 1.000, smartmove0.1
Wins: 289, Losses: 617, Draws: 95
Episode: 1100, Win Rate: 0.29, Epsilon: 0.010, smartmove0.11
Wins: 318, Losses: 684, Draws: 99
Episode: 1200, Win Rate: 0.29, Epsilon: 0.010, smartmove0.12
Wins: 351, Losses: 743, Draws: 107
Episode: 1300, Win Rate: 0.30, Epsilon: 0.010, smartmove0.13
Wins: 390, Losses: 803, Draws: 108
Episode: 1400, Win Rate: 0.30, Epsilon: 0.010, smartmove0.14
Wins: 426, Losses: 863, Draws: 112
Episode: 1500, Win Rate: 0.30, Epsilon: 0.010, smartmove0.15
Wins: 455, Losses: 931, Draws: 115
Episode: 1600, Win Rate: 0.32, Epsilon: 0.010, smartmove0.16
Wins: 507, Losses: 977, Draws: 117
Episode: 1700, Win Rate: 0.32, Epsilon: 0.010, smartmove0.17
Wins: 547, Losses: 1035, Draws: 119
Episode: 1800, Win Rate: 0.32, Epsilon: 0.010, smartmove0.18
Wins: 570, Losses: 1106, Draws: 125
Episode: 1900, Win Rate: 0.32, Epsilon: 0.010, smartmove0.19
Wins: 604, Losses: 1171, Draws: 126




Resetting epsilon to 1.0
Episode: 2000, Win Rate: 0.32, Epsilon: 1.000, smartmove0.2
Wins: 644, Losses: 1227, Draws: 130
Episode: 2100, Win Rate: 0.32, Epsilon: 0.901, smartmove0.21
Wins: 669, Losses: 1294, Draws: 138
Episode: 2200, Win Rate: 0.31, Epsilon: 0.802, smartmove0.22
Wins: 693, Losses: 1360, Draws: 148
Episode: 2300, Win Rate: 0.31, Epsilon: 0.703, smartmove0.23
Wins: 721, Losses: 1424, Draws: 156
Episode: 2400, Win Rate: 0.31, Epsilon: 0.604, smartmove0.24
Wins: 750, Losses: 1488, Draws: 163
Episode: 2500, Win Rate: 0.31, Epsilon: 0.505, smartmove0.25
Wins: 786, Losses: 1537, Draws: 178
Episode: 2600, Win Rate: 0.31, Epsilon: 0.406, smartmove0.26
Wins: 809, Losses: 1605, Draws: 187
Episode: 2700, Win Rate: 0.31, Epsilon: 0.307, smartmove0.27
Wins: 841, Losses: 1664, Draws: 196
Episode: 2800, Win Rate: 0.31, Epsilon: 0.208, smartmove0.28
Wins: 867, Losses: 1731, Draws: 203
Episode: 2900, Win Rate: 0.31, Epsilon: 0.109, smartmove0.29
Wins: 896, Losses: 1796, Draws: 209




Resetting epsilon to 1.0
Episode: 3000, Win Rate: 0.31, Epsilon: 1.000, smartmove0.3
Wins: 926, Losses: 1863, Draws: 212
Episode: 3100, Win Rate: 0.30, Epsilon: 0.010, smartmove0.31
Wins: 945, Losses: 1936, Draws: 220
Episode: 3200, Win Rate: 0.30, Epsilon: 0.010, smartmove0.32
Wins: 973, Losses: 1998, Draws: 230
Episode: 3300, Win Rate: 0.30, Epsilon: 0.010, smartmove0.33
Wins: 1004, Losses: 2060, Draws: 237
Episode: 3400, Win Rate: 0.30, Epsilon: 0.010, smartmove0.34
Wins: 1032, Losses: 2120, Draws: 249
Episode: 3500, Win Rate: 0.30, Epsilon: 0.010, smartmove0.35
Wins: 1057, Losses: 2188, Draws: 256
Episode: 3600, Win Rate: 0.30, Epsilon: 0.010, smartmove0.36
Wins: 1086, Losses: 2247, Draws: 268
Episode: 3700, Win Rate: 0.30, Epsilon: 0.010, smartmove0.37
Wins: 1107, Losses: 2320, Draws: 274
Episode: 3800, Win Rate: 0.30, Epsilon: 0.010, smartmove0.38
Wins: 1132, Losses: 2387, Draws: 282
Episode: 3900, Win Rate: 0.30, Epsilon: 0.010, smartmove0.39
Wins: 1166, Losses: 2445, Draws: 290



Resetting epsilon to 1.0
Episode: 4000, Win Rate: 0.30, Epsilon: 1.000, smartmove0.4
Wins: 1186, Losses: 2513, Draws: 302
Episode: 4100, Win Rate: 0.29, Epsilon: 0.901, smartmove0.41
Wins: 1206, Losses: 2586, Draws: 309
Episode: 4200, Win Rate: 0.29, Epsilon: 0.802, smartmove0.42
Wins: 1221, Losses: 2663, Draws: 317
Episode: 4300, Win Rate: 0.29, Epsilon: 0.703, smartmove0.43
Wins: 1242, Losses: 2733, Draws: 326
Episode: 4400, Win Rate: 0.29, Epsilon: 0.604, smartmove0.44
Wins: 1262, Losses: 2807, Draws: 332
Episode: 4500, Win Rate: 0.29, Epsilon: 0.505, smartmove0.45
Wins: 1284, Losses: 2875, Draws: 342
Episode: 4600, Win Rate: 0.28, Epsilon: 0.406, smartmove0.46
Wins: 1301, Losses: 2947, Draws: 353
Episode: 4700, Win Rate: 0.28, Epsilon: 0.307, smartmove0.47
Wins: 1321, Losses: 3013, Draws: 367
Episode: 4800, Win Rate: 0.28, Epsilon: 0.208, smartmove0.48
Wins: 1339, Losses: 3085, Draws: 377
Episode: 4900, Win Rate: 0.28, Epsilon: 0.109, smartmove0.49
Wins: 1361, Losses: 3152, Draws: 



Resetting epsilon to 1.0
Episode: 5000, Win Rate: 0.28, Epsilon: 1.000, smartmove0.5
Wins: 1383, Losses: 3215, Draws: 403
Episode: 5100, Win Rate: 0.28, Epsilon: 0.010, smartmove0.51
Wins: 1410, Losses: 3278, Draws: 413
Episode: 5200, Win Rate: 0.28, Epsilon: 0.010, smartmove0.52
Wins: 1439, Losses: 3338, Draws: 424
Episode: 5300, Win Rate: 0.28, Epsilon: 0.010, smartmove0.53
Wins: 1463, Losses: 3400, Draws: 438
Episode: 5400, Win Rate: 0.27, Epsilon: 0.010, smartmove0.54
Wins: 1479, Losses: 3465, Draws: 457
Episode: 5500, Win Rate: 0.28, Epsilon: 0.010, smartmove0.55
Wins: 1514, Losses: 3522, Draws: 465
Episode: 5600, Win Rate: 0.28, Epsilon: 0.010, smartmove0.56
Wins: 1544, Losses: 3581, Draws: 476
Episode: 5700, Win Rate: 0.28, Epsilon: 0.010, smartmove0.57
Wins: 1571, Losses: 3642, Draws: 488
Episode: 5800, Win Rate: 0.28, Epsilon: 0.010, smartmove0.58
Wins: 1599, Losses: 3699, Draws: 503
Episode: 5900, Win Rate: 0.27, Epsilon: 0.010, smartmove0.59
Wins: 1620, Losses: 3761, Draws: 



Resetting epsilon to 1.0
Episode: 6000, Win Rate: 0.27, Epsilon: 1.000, smartmove0.6
Wins: 1643, Losses: 3825, Draws: 533
Episode: 6100, Win Rate: 0.27, Epsilon: 0.901, smartmove0.61
Wins: 1651, Losses: 3904, Draws: 546
Episode: 6200, Win Rate: 0.27, Epsilon: 0.802, smartmove0.62
Wins: 1670, Losses: 3973, Draws: 558
Episode: 6300, Win Rate: 0.27, Epsilon: 0.703, smartmove0.63
Wins: 1675, Losses: 4058, Draws: 568
Episode: 6400, Win Rate: 0.26, Epsilon: 0.604, smartmove0.64
Wins: 1686, Losses: 4137, Draws: 578
Episode: 6500, Win Rate: 0.26, Epsilon: 0.505, smartmove0.65
Wins: 1701, Losses: 4208, Draws: 592
Episode: 6600, Win Rate: 0.26, Epsilon: 0.406, smartmove0.66
Wins: 1720, Losses: 4276, Draws: 605
Episode: 6700, Win Rate: 0.26, Epsilon: 0.307, smartmove0.67
Wins: 1736, Losses: 4349, Draws: 616
Episode: 6800, Win Rate: 0.26, Epsilon: 0.208, smartmove0.68
Wins: 1752, Losses: 4416, Draws: 633
Episode: 6900, Win Rate: 0.26, Epsilon: 0.109, smartmove0.69
Wins: 1777, Losses: 4475, Draws: 



Resetting epsilon to 1.0
Episode: 7000, Win Rate: 0.26, Epsilon: 1.000, smartmove0.7
Wins: 1797, Losses: 4538, Draws: 666
Episode: 7100, Win Rate: 0.25, Epsilon: 0.010, smartmove0.71
Wins: 1806, Losses: 4609, Draws: 686
Episode: 7200, Win Rate: 0.25, Epsilon: 0.010, smartmove0.72
Wins: 1827, Losses: 4672, Draws: 702
Episode: 7300, Win Rate: 0.25, Epsilon: 0.010, smartmove0.73
Wins: 1844, Losses: 4736, Draws: 721
Episode: 7400, Win Rate: 0.25, Epsilon: 0.010, smartmove0.74
Wins: 1866, Losses: 4794, Draws: 741
Episode: 7500, Win Rate: 0.25, Epsilon: 0.010, smartmove0.75
Wins: 1892, Losses: 4848, Draws: 761
Episode: 7600, Win Rate: 0.25, Epsilon: 0.010, smartmove0.76
Wins: 1910, Losses: 4901, Draws: 790
Episode: 7700, Win Rate: 0.25, Epsilon: 0.010, smartmove0.77
Wins: 1931, Losses: 4956, Draws: 814
Episode: 7800, Win Rate: 0.25, Epsilon: 0.010, smartmove0.78
Wins: 1946, Losses: 5018, Draws: 837
Episode: 7900, Win Rate: 0.25, Epsilon: 0.010, smartmove0.79
Wins: 1961, Losses: 5081, Draws: 



Resetting epsilon to 1.0
Episode: 8000, Win Rate: 0.25, Epsilon: 1.000, smartmove0.8
Wins: 1977, Losses: 5140, Draws: 884
Episode: 8100, Win Rate: 0.24, Epsilon: 0.901, smartmove0.81
Wins: 1981, Losses: 5223, Draws: 897
Episode: 8200, Win Rate: 0.24, Epsilon: 0.802, smartmove0.82
Wins: 1989, Losses: 5300, Draws: 912
Episode: 8300, Win Rate: 0.24, Epsilon: 0.703, smartmove0.83
Wins: 1997, Losses: 5381, Draws: 923
Episode: 8400, Win Rate: 0.24, Epsilon: 0.604, smartmove0.84
Wins: 2005, Losses: 5464, Draws: 932
Episode: 8500, Win Rate: 0.24, Epsilon: 0.505, smartmove0.85
Wins: 2016, Losses: 5535, Draws: 950
Episode: 8600, Win Rate: 0.24, Epsilon: 0.406, smartmove0.86
Wins: 2026, Losses: 5609, Draws: 966
Episode: 8700, Win Rate: 0.23, Epsilon: 0.307, smartmove0.87
Wins: 2035, Losses: 5677, Draws: 989
Episode: 8800, Win Rate: 0.23, Epsilon: 0.208, smartmove0.88
Wins: 2042, Losses: 5756, Draws: 1003
Episode: 8900, Win Rate: 0.23, Epsilon: 0.109, smartmove0.89
Wins: 2054, Losses: 5817, Draws:



Resetting epsilon to 1.0
Episode: 9000, Win Rate: 0.23, Epsilon: 1.000, smartmove0.9
Wins: 2061, Losses: 5872, Draws: 1068
Episode: 9100, Win Rate: 0.23, Epsilon: 0.010, smartmove0.91
Wins: 2073, Losses: 5926, Draws: 1102
Episode: 9200, Win Rate: 0.23, Epsilon: 0.010, smartmove0.92
Wins: 2085, Losses: 5981, Draws: 1135
Episode: 9300, Win Rate: 0.22, Epsilon: 0.010, smartmove0.93
Wins: 2089, Losses: 6040, Draws: 1172
Episode: 9400, Win Rate: 0.22, Epsilon: 0.010, smartmove0.94
Wins: 2099, Losses: 6104, Draws: 1198
Episode: 9500, Win Rate: 0.22, Epsilon: 0.010, smartmove0.95
Wins: 2103, Losses: 6170, Draws: 1228
Episode: 9600, Win Rate: 0.22, Epsilon: 0.010, smartmove0.96
Wins: 2106, Losses: 6234, Draws: 1261
Episode: 9700, Win Rate: 0.22, Epsilon: 0.010, smartmove0.97
Wins: 2115, Losses: 6288, Draws: 1298
Episode: 9800, Win Rate: 0.22, Epsilon: 0.010, smartmove0.98
Wins: 2127, Losses: 6338, Draws: 1336
Episode: 9900, Win Rate: 0.22, Epsilon: 0.010, smartmove0.99
Wins: 2131, Losses: 6385



In [3]:
import numpy as np
import random
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import BatchNormalization, Dropout
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.99):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.0005  # Reduced learning rate for more stable learning
        self.epsilon = 1.0
        self.epsilon_min = 0.05
        self.epsilon_decay = 0.85
        self.batch_size = 64  # Increased batch size
        self.replay_buffer = deque(maxlen=50000)  # Increased buffer size
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_counter = 0
        self.update_target_frequency = 1000

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def process_state(self, state):
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        
        # Prioritize winning moves and block opponent wins
        temp_board = np.array(state).copy()
        for action in valid_actions:
            # Check for winning move
            temp_board[action] = 2
            if self._check_win(temp_board, 2):
                return action
            temp_board[action] = 0
            
            # Check for blocking opponent win
            temp_board[action] = 1
            if self._check_win(temp_board, 1):
                return action
            temp_board[action] = 0

        # If no immediate winning/blocking moves, use Q-values
        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def _check_win(self, board, player):
        win_combinations = [
            [0, 1, 2], [3, 4, 5], [6, 7, 8],  # Rows
            [0, 3, 6], [1, 4, 7], [2, 5, 8],  # Columns
            [0, 4, 8], [2, 4, 6]  # Diagonals
        ]
        return any(all(board[i] == player for i in combo) for combo in win_combinations)

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        
        # Use target network for more stable training
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.target_model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                next_valid_actions = [j for j, val in enumerate(next_state) if val == 0]
                if next_valid_actions:
                    # Double DQN: Select action using online network, evaluate using target network
                    next_q = self.model.predict(next_states[i:i+1], verbose=0)[0]
                    best_action = max([(next_q[a], a) for a in next_valid_actions], key=lambda x: x[0])[1]
                    target = reward + self.gamma * next_q_values[i][best_action]
                else:
                    target = reward
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=self.batch_size, epochs=1, verbose=0)
        
        # Update target network periodically
        self.update_target_counter += 1
        if self.update_target_counter >= self.update_target_frequency:
            self.target_model.set_weights(self.model.get_weights())
            self.update_target_counter = 0

def train_agent(episodes=10000):
    agent = SQNAgent()
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    best_win_rate = 0
    no_improvement_counter = 0
    smartness = 0
    for episode in range(episodes):
        # Decrease epsilon every 100 iterations
        if episode % 100 == 0 and episode%1000!=0:
            agent.epsilon = max(agent.epsilon_min, agent.epsilon * agent.epsilon_decay)

        # Increase smartness and partially reset epsilon every 2000 iterations
        if episode % 1000 == 0 and episode > 0:
            smartness = min(0.8, smartness + 0.1)
            agent.epsilon = 1  # Partial epsilon reset
            
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 1 + (0.1 * smartness)  # Higher reward for winning against smarter opponent
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            if not game.is_full():
                game.player1_move()
                
            if game.current_winner == 1:
                reward = -1
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = -0.1 * smartness  # Small negative reward for draws against smart opponent
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        # Evaluation and model saving logic
        if episode % 100 == 0:
            win_rate = history['wins'] / (episode + 1)
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Epsilon: {agent.epsilon:.3f}, Smartness: {smartness}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
            
            if win_rate > best_win_rate:
                best_win_rate = win_rate
                no_improvement_counter = 0
            else:
                no_improvement_counter += 1
            
            if episode % 1000 == 0:
                agent.model.save(f'model9_episode_{episode}.h5')
        
    agent.model.save('model9.h5')
    return agent

agent = train_agent()




Episode: 0, Win Rate: 0.00, Epsilon: 1.000, Smartness: 0
Wins: 0, Losses: 1, Draws: 0
Episode: 100, Win Rate: 0.24, Epsilon: 0.850, Smartness: 0
Wins: 24, Losses: 60, Draws: 17
Episode: 200, Win Rate: 0.26, Epsilon: 0.722, Smartness: 0
Wins: 52, Losses: 120, Draws: 29
Episode: 300, Win Rate: 0.28, Epsilon: 0.614, Smartness: 0
Wins: 85, Losses: 169, Draws: 47
Episode: 400, Win Rate: 0.30, Epsilon: 0.522, Smartness: 0
Wins: 122, Losses: 210, Draws: 69
Episode: 500, Win Rate: 0.33, Epsilon: 0.444, Smartness: 0
Wins: 164, Losses: 240, Draws: 97
Episode: 600, Win Rate: 0.33, Epsilon: 0.377, Smartness: 0
Wins: 198, Losses: 277, Draws: 126
Episode: 700, Win Rate: 0.35, Epsilon: 0.321, Smartness: 0
Wins: 247, Losses: 301, Draws: 153
Episode: 800, Win Rate: 0.36, Epsilon: 0.272, Smartness: 0
Wins: 291, Losses: 332, Draws: 178
Episode: 900, Win Rate: 0.37, Epsilon: 0.232, Smartness: 0
Wins: 335, Losses: 356, Draws: 210




Episode: 1000, Win Rate: 0.39, Epsilon: 1.000, Smartness: 0.1
Wins: 391, Losses: 382, Draws: 228
Episode: 1100, Win Rate: 0.38, Epsilon: 0.850, Smartness: 0.1
Wins: 420, Losses: 441, Draws: 240
Episode: 1200, Win Rate: 0.37, Epsilon: 0.722, Smartness: 0.1
Wins: 449, Losses: 498, Draws: 254
Episode: 1300, Win Rate: 0.38, Epsilon: 0.614, Smartness: 0.1
Wins: 488, Losses: 547, Draws: 266
Episode: 1400, Win Rate: 0.37, Epsilon: 0.522, Smartness: 0.1
Wins: 525, Losses: 591, Draws: 285
Episode: 1500, Win Rate: 0.37, Epsilon: 0.444, Smartness: 0.1
Wins: 560, Losses: 631, Draws: 310
Episode: 1600, Win Rate: 0.38, Epsilon: 0.377, Smartness: 0.1
Wins: 612, Losses: 657, Draws: 332
Episode: 1700, Win Rate: 0.39, Epsilon: 0.321, Smartness: 0.1
Wins: 657, Losses: 691, Draws: 353
Episode: 1800, Win Rate: 0.39, Epsilon: 0.272, Smartness: 0.1
Wins: 697, Losses: 720, Draws: 384
Episode: 1900, Win Rate: 0.39, Epsilon: 0.232, Smartness: 0.1
Wins: 748, Losses: 747, Draws: 406




Episode: 2000, Win Rate: 0.39, Epsilon: 1.000, Smartness: 0.2
Wins: 789, Losses: 775, Draws: 437
Episode: 2100, Win Rate: 0.38, Epsilon: 0.850, Smartness: 0.2
Wins: 808, Losses: 848, Draws: 445
Episode: 2200, Win Rate: 0.38, Epsilon: 0.722, Smartness: 0.2
Wins: 838, Losses: 903, Draws: 460
Episode: 2300, Win Rate: 0.38, Epsilon: 0.614, Smartness: 0.2
Wins: 868, Losses: 954, Draws: 479
Episode: 2400, Win Rate: 0.37, Epsilon: 0.522, Smartness: 0.2
Wins: 897, Losses: 1010, Draws: 494
Episode: 2500, Win Rate: 0.37, Epsilon: 0.444, Smartness: 0.2
Wins: 928, Losses: 1056, Draws: 517
Episode: 2600, Win Rate: 0.37, Epsilon: 0.377, Smartness: 0.2
Wins: 961, Losses: 1103, Draws: 537
Episode: 2700, Win Rate: 0.37, Epsilon: 0.321, Smartness: 0.2
Wins: 1010, Losses: 1135, Draws: 556
Episode: 2800, Win Rate: 0.38, Epsilon: 0.272, Smartness: 0.2
Wins: 1059, Losses: 1167, Draws: 575
Episode: 2900, Win Rate: 0.38, Epsilon: 0.232, Smartness: 0.2
Wins: 1106, Losses: 1193, Draws: 602




Episode: 3000, Win Rate: 0.38, Epsilon: 1.000, Smartness: 0.30000000000000004
Wins: 1150, Losses: 1226, Draws: 625
Episode: 3100, Win Rate: 0.38, Epsilon: 0.850, Smartness: 0.30000000000000004
Wins: 1174, Losses: 1294, Draws: 633
Episode: 3200, Win Rate: 0.37, Epsilon: 0.722, Smartness: 0.30000000000000004
Wins: 1189, Losses: 1362, Draws: 650
Episode: 3300, Win Rate: 0.37, Epsilon: 0.614, Smartness: 0.30000000000000004
Wins: 1220, Losses: 1416, Draws: 665
Episode: 3400, Win Rate: 0.37, Epsilon: 0.522, Smartness: 0.30000000000000004
Wins: 1250, Losses: 1471, Draws: 680
Episode: 3500, Win Rate: 0.37, Epsilon: 0.444, Smartness: 0.30000000000000004
Wins: 1282, Losses: 1509, Draws: 710
Episode: 3600, Win Rate: 0.36, Epsilon: 0.377, Smartness: 0.30000000000000004
Wins: 1313, Losses: 1552, Draws: 736
Episode: 3700, Win Rate: 0.36, Epsilon: 0.321, Smartness: 0.30000000000000004
Wins: 1347, Losses: 1593, Draws: 761
Episode: 3800, Win Rate: 0.36, Epsilon: 0.272, Smartness: 0.30000000000000004
Wi



Episode: 4000, Win Rate: 0.37, Epsilon: 1.000, Smartness: 0.4
Wins: 1466, Losses: 1704, Draws: 831
Episode: 4100, Win Rate: 0.36, Epsilon: 0.850, Smartness: 0.4
Wins: 1479, Losses: 1776, Draws: 846
Episode: 4200, Win Rate: 0.36, Epsilon: 0.722, Smartness: 0.4
Wins: 1497, Losses: 1846, Draws: 858
Episode: 4300, Win Rate: 0.35, Epsilon: 0.614, Smartness: 0.4
Wins: 1520, Losses: 1909, Draws: 872
Episode: 4400, Win Rate: 0.35, Epsilon: 0.522, Smartness: 0.4
Wins: 1546, Losses: 1966, Draws: 889
Episode: 4500, Win Rate: 0.35, Epsilon: 0.444, Smartness: 0.4
Wins: 1571, Losses: 2011, Draws: 919
Episode: 4600, Win Rate: 0.35, Epsilon: 0.377, Smartness: 0.4
Wins: 1603, Losses: 2052, Draws: 946
Episode: 4700, Win Rate: 0.35, Epsilon: 0.321, Smartness: 0.4
Wins: 1637, Losses: 2092, Draws: 972
Episode: 4800, Win Rate: 0.35, Epsilon: 0.272, Smartness: 0.4
Wins: 1675, Losses: 2130, Draws: 996
Episode: 4900, Win Rate: 0.35, Epsilon: 0.232, Smartness: 0.4
Wins: 1708, Losses: 2162, Draws: 1031




Episode: 5000, Win Rate: 0.35, Epsilon: 1.000, Smartness: 0.5
Wins: 1741, Losses: 2203, Draws: 1057
Episode: 5100, Win Rate: 0.34, Epsilon: 0.850, Smartness: 0.5
Wins: 1746, Losses: 2289, Draws: 1066
Episode: 5200, Win Rate: 0.34, Epsilon: 0.722, Smartness: 0.5
Wins: 1765, Losses: 2357, Draws: 1079
Episode: 5300, Win Rate: 0.34, Epsilon: 0.614, Smartness: 0.5
Wins: 1784, Losses: 2421, Draws: 1096
Episode: 5400, Win Rate: 0.33, Epsilon: 0.522, Smartness: 0.5
Wins: 1807, Losses: 2476, Draws: 1118
Episode: 5500, Win Rate: 0.33, Epsilon: 0.444, Smartness: 0.5
Wins: 1831, Losses: 2527, Draws: 1143
Episode: 5600, Win Rate: 0.33, Epsilon: 0.377, Smartness: 0.5
Wins: 1865, Losses: 2574, Draws: 1162
Episode: 5700, Win Rate: 0.33, Epsilon: 0.321, Smartness: 0.5
Wins: 1895, Losses: 2611, Draws: 1195
Episode: 5800, Win Rate: 0.33, Epsilon: 0.272, Smartness: 0.5
Wins: 1924, Losses: 2652, Draws: 1225
Episode: 5900, Win Rate: 0.33, Epsilon: 0.232, Smartness: 0.5
Wins: 1950, Losses: 2695, Draws: 1256




Episode: 6000, Win Rate: 0.33, Epsilon: 1.000, Smartness: 0.6
Wins: 1985, Losses: 2729, Draws: 1287
Episode: 6100, Win Rate: 0.33, Epsilon: 0.850, Smartness: 0.6
Wins: 1999, Losses: 2799, Draws: 1303
Episode: 6200, Win Rate: 0.32, Epsilon: 0.722, Smartness: 0.6
Wins: 2012, Losses: 2872, Draws: 1317
Episode: 6300, Win Rate: 0.32, Epsilon: 0.614, Smartness: 0.6
Wins: 2029, Losses: 2939, Draws: 1333
Episode: 6400, Win Rate: 0.32, Epsilon: 0.522, Smartness: 0.6
Wins: 2044, Losses: 3007, Draws: 1350
Episode: 6500, Win Rate: 0.32, Epsilon: 0.444, Smartness: 0.6
Wins: 2069, Losses: 3063, Draws: 1369
Episode: 6600, Win Rate: 0.32, Epsilon: 0.377, Smartness: 0.6
Wins: 2092, Losses: 3113, Draws: 1396
Episode: 6700, Win Rate: 0.32, Epsilon: 0.321, Smartness: 0.6
Wins: 2112, Losses: 3165, Draws: 1424
Episode: 6800, Win Rate: 0.31, Epsilon: 0.272, Smartness: 0.6
Wins: 2135, Losses: 3216, Draws: 1450
Episode: 6900, Win Rate: 0.31, Epsilon: 0.232, Smartness: 0.6
Wins: 2159, Losses: 3259, Draws: 1483




Episode: 7000, Win Rate: 0.31, Epsilon: 1.000, Smartness: 0.7
Wins: 2190, Losses: 3298, Draws: 1513
Episode: 7100, Win Rate: 0.31, Epsilon: 0.850, Smartness: 0.7
Wins: 2198, Losses: 3381, Draws: 1522
Episode: 7200, Win Rate: 0.31, Epsilon: 0.722, Smartness: 0.7
Wins: 2209, Losses: 3455, Draws: 1537
Episode: 7300, Win Rate: 0.30, Epsilon: 0.614, Smartness: 0.7
Wins: 2218, Losses: 3531, Draws: 1552
Episode: 7400, Win Rate: 0.30, Epsilon: 0.522, Smartness: 0.7
Wins: 2229, Losses: 3596, Draws: 1576
Episode: 7500, Win Rate: 0.30, Epsilon: 0.444, Smartness: 0.7
Wins: 2249, Losses: 3648, Draws: 1604
Episode: 7600, Win Rate: 0.30, Epsilon: 0.377, Smartness: 0.7
Wins: 2268, Losses: 3704, Draws: 1629
Episode: 7700, Win Rate: 0.30, Epsilon: 0.321, Smartness: 0.7
Wins: 2281, Losses: 3763, Draws: 1657
Episode: 7800, Win Rate: 0.29, Epsilon: 0.272, Smartness: 0.7
Wins: 2301, Losses: 3804, Draws: 1696
Episode: 7900, Win Rate: 0.29, Epsilon: 0.232, Smartness: 0.7
Wins: 2322, Losses: 3835, Draws: 1744




Episode: 8000, Win Rate: 0.29, Epsilon: 1.000, Smartness: 0.7999999999999999
Wins: 2337, Losses: 3881, Draws: 1783
Episode: 8100, Win Rate: 0.29, Epsilon: 0.850, Smartness: 0.7999999999999999
Wins: 2344, Losses: 3961, Draws: 1796
Episode: 8200, Win Rate: 0.29, Epsilon: 0.722, Smartness: 0.7999999999999999
Wins: 2354, Losses: 4037, Draws: 1810
Episode: 8300, Win Rate: 0.28, Epsilon: 0.614, Smartness: 0.7999999999999999
Wins: 2363, Losses: 4102, Draws: 1836
Episode: 8400, Win Rate: 0.28, Epsilon: 0.522, Smartness: 0.7999999999999999
Wins: 2378, Losses: 4169, Draws: 1854
Episode: 8500, Win Rate: 0.28, Epsilon: 0.444, Smartness: 0.7999999999999999
Wins: 2388, Losses: 4233, Draws: 1880
Episode: 8600, Win Rate: 0.28, Epsilon: 0.377, Smartness: 0.7999999999999999
Wins: 2396, Losses: 4295, Draws: 1910
Episode: 8700, Win Rate: 0.28, Epsilon: 0.321, Smartness: 0.7999999999999999
Wins: 2408, Losses: 4352, Draws: 1941
Episode: 8800, Win Rate: 0.28, Epsilon: 0.272, Smartness: 0.7999999999999999
Win



Episode: 9000, Win Rate: 0.27, Epsilon: 1.000, Smartness: 0.8
Wins: 2438, Losses: 4497, Draws: 2066
Episode: 9100, Win Rate: 0.27, Epsilon: 0.850, Smartness: 0.8
Wins: 2442, Losses: 4585, Draws: 2074
Episode: 9200, Win Rate: 0.27, Epsilon: 0.722, Smartness: 0.8
Wins: 2448, Losses: 4660, Draws: 2093
Episode: 9300, Win Rate: 0.26, Epsilon: 0.614, Smartness: 0.8
Wins: 2457, Losses: 4735, Draws: 2109
Episode: 9400, Win Rate: 0.26, Epsilon: 0.522, Smartness: 0.8
Wins: 2469, Losses: 4799, Draws: 2133
Episode: 9500, Win Rate: 0.26, Epsilon: 0.444, Smartness: 0.8
Wins: 2482, Losses: 4850, Draws: 2169
Episode: 9600, Win Rate: 0.26, Epsilon: 0.377, Smartness: 0.8
Wins: 2490, Losses: 4908, Draws: 2203
Episode: 9700, Win Rate: 0.26, Epsilon: 0.321, Smartness: 0.8
Wins: 2503, Losses: 4963, Draws: 2235
Episode: 9800, Win Rate: 0.26, Epsilon: 0.272, Smartness: 0.8
Wins: 2513, Losses: 5012, Draws: 2276
Episode: 9900, Win Rate: 0.26, Epsilon: 0.232, Smartness: 0.8
Wins: 2529, Losses: 5056, Draws: 2316




In [4]:
import numpy as np
import random
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.99):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.0005  # Reduced learning rate for more stable learning
        self.epsilon = 1.0
        self.epsilon_min = 0.05
        self.epsilon_decay = 0.85
        self.batch_size = 64  # Increased batch size
        self.replay_buffer = deque(maxlen=50000)  # Increased buffer size
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_counter = 0
        self.update_target_frequency = 1000

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def process_state(self, state):
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        
        # Prioritize winning moves and block opponent wins
        temp_board = np.array(state).copy()
        for action in valid_actions:
            # Check for winning move
            temp_board[action] = 2
            if self._check_win(temp_board, 2):
                return action
            temp_board[action] = 0
            
            # Check for blocking opponent win
            temp_board[action] = 1
            if self._check_win(temp_board, 1):
                return action
            temp_board[action] = 0

        # If no immediate winning/blocking moves, use Q-values
        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def _check_win(self, board, player):
        win_combinations = [
            [0, 1, 2], [3, 4, 5], [6, 7, 8],  # Rows
            [0, 3, 6], [1, 4, 7], [2, 5, 8],  # Columns
            [0, 4, 8], [2, 4, 6]  # Diagonals
        ]
        return any(all(board[i] == player for i in combo) for combo in win_combinations)

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        
        # Directly use the target network's Q-values
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.target_model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                target = reward + self.gamma * max(next_q_values[i])
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=self.batch_size, epochs=1, verbose=0)
        
        # Update target network periodically
        self.update_target_counter += 1
        if self.update_target_counter >= self.update_target_frequency:
            self.target_model.set_weights(self.model.get_weights())
            self.update_target_counter = 0

def train_agent(episodes=10000):
    agent = SQNAgent()
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    best_win_rate = 0
    no_improvement_counter = 0
    smartness = 0
    for episode in range(episodes):
        # Decrease epsilon every 100 iterations
        if episode % 100 == 0 and episode % 1000 != 0:
            agent.epsilon = max(agent.epsilon_min, agent.epsilon * agent.epsilon_decay)

        # Increase smartness and partially reset epsilon every 2000 iterations
        if episode % 1000 == 0 and episode > 0:
            smartness = min(0.8, smartness + 0.1)
            agent.epsilon = 1  # Partial epsilon reset
            
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 1 + (0.1 * smartness)  # Higher reward for winning against smarter opponent
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            if not game.is_full():
                game.player1_move()
                
            if game.current_winner == 1:
                reward = -1
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = -0.1 * smartness  # Small negative reward for draws against smart opponent
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        # Evaluation and model saving logic
        if episode % 100 == 0:
            win_rate = history['wins'] / (episode + 1)
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Epsilon: {agent.epsilon:.3f}, Smartness: {smartness}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
            
            if win_rate > best_win_rate:
                best_win_rate = win_rate
                no_improvement_counter = 0
            else:
                no_improvement_counter += 1
            
            if episode % 1000 == 0:
                agent.model.save(f'model10_episode_{episode}.h5')
        
    agent.model.save('model10.h5')
    return agent

agent = train_agent()




Episode: 0, Win Rate: 0.00, Epsilon: 1.000, Smartness: 0
Wins: 0, Losses: 0, Draws: 1
Episode: 100, Win Rate: 0.28, Epsilon: 0.850, Smartness: 0
Wins: 28, Losses: 57, Draws: 16
Episode: 200, Win Rate: 0.28, Epsilon: 0.722, Smartness: 0
Wins: 57, Losses: 108, Draws: 36
Episode: 300, Win Rate: 0.32, Epsilon: 0.614, Smartness: 0
Wins: 95, Losses: 157, Draws: 49
Episode: 400, Win Rate: 0.34, Epsilon: 0.522, Smartness: 0
Wins: 136, Losses: 198, Draws: 67
Episode: 500, Win Rate: 0.36, Epsilon: 0.444, Smartness: 0
Wins: 180, Losses: 238, Draws: 83
Episode: 600, Win Rate: 0.38, Epsilon: 0.377, Smartness: 0
Wins: 231, Losses: 270, Draws: 100
Episode: 700, Win Rate: 0.39, Epsilon: 0.321, Smartness: 0
Wins: 274, Losses: 301, Draws: 126
Episode: 800, Win Rate: 0.39, Epsilon: 0.272, Smartness: 0
Wins: 310, Losses: 336, Draws: 155
Episode: 900, Win Rate: 0.39, Epsilon: 0.232, Smartness: 0
Wins: 350, Losses: 367, Draws: 184




Episode: 1000, Win Rate: 0.39, Epsilon: 1.000, Smartness: 0.1
Wins: 393, Losses: 397, Draws: 211
Episode: 1100, Win Rate: 0.38, Epsilon: 0.850, Smartness: 0.1
Wins: 417, Losses: 466, Draws: 218
Episode: 1200, Win Rate: 0.38, Epsilon: 0.722, Smartness: 0.1
Wins: 452, Losses: 517, Draws: 232
Episode: 1300, Win Rate: 0.38, Epsilon: 0.614, Smartness: 0.1
Wins: 491, Losses: 563, Draws: 247
Episode: 1400, Win Rate: 0.38, Epsilon: 0.522, Smartness: 0.1
Wins: 534, Losses: 604, Draws: 263
Episode: 1500, Win Rate: 0.38, Epsilon: 0.444, Smartness: 0.1
Wins: 568, Losses: 651, Draws: 282
Episode: 1600, Win Rate: 0.39, Epsilon: 0.377, Smartness: 0.1
Wins: 617, Losses: 679, Draws: 305
Episode: 1700, Win Rate: 0.39, Epsilon: 0.321, Smartness: 0.1
Wins: 656, Losses: 712, Draws: 333
Episode: 1800, Win Rate: 0.39, Epsilon: 0.272, Smartness: 0.1
Wins: 710, Losses: 736, Draws: 355
Episode: 1900, Win Rate: 0.41, Epsilon: 0.232, Smartness: 0.1
Wins: 773, Losses: 758, Draws: 370




Episode: 2000, Win Rate: 0.42, Epsilon: 1.000, Smartness: 0.2
Wins: 835, Losses: 775, Draws: 391
Episode: 2100, Win Rate: 0.41, Epsilon: 0.850, Smartness: 0.2
Wins: 857, Losses: 846, Draws: 398
Episode: 2200, Win Rate: 0.40, Epsilon: 0.722, Smartness: 0.2
Wins: 882, Losses: 907, Draws: 412
Episode: 2300, Win Rate: 0.40, Epsilon: 0.614, Smartness: 0.2
Wins: 922, Losses: 952, Draws: 427
Episode: 2400, Win Rate: 0.40, Epsilon: 0.522, Smartness: 0.2
Wins: 963, Losses: 990, Draws: 448
Episode: 2500, Win Rate: 0.40, Epsilon: 0.444, Smartness: 0.2
Wins: 997, Losses: 1032, Draws: 472
Episode: 2600, Win Rate: 0.40, Epsilon: 0.377, Smartness: 0.2
Wins: 1040, Losses: 1068, Draws: 493
Episode: 2700, Win Rate: 0.40, Epsilon: 0.321, Smartness: 0.2
Wins: 1083, Losses: 1096, Draws: 522
Episode: 2800, Win Rate: 0.40, Epsilon: 0.272, Smartness: 0.2
Wins: 1128, Losses: 1125, Draws: 548
Episode: 2900, Win Rate: 0.41, Epsilon: 0.232, Smartness: 0.2
Wins: 1177, Losses: 1146, Draws: 578




Episode: 3000, Win Rate: 0.41, Epsilon: 1.000, Smartness: 0.30000000000000004
Wins: 1228, Losses: 1171, Draws: 602
Episode: 3100, Win Rate: 0.40, Epsilon: 0.850, Smartness: 0.30000000000000004
Wins: 1254, Losses: 1234, Draws: 613
Episode: 3200, Win Rate: 0.40, Epsilon: 0.722, Smartness: 0.30000000000000004
Wins: 1269, Losses: 1300, Draws: 632
Episode: 3300, Win Rate: 0.40, Epsilon: 0.614, Smartness: 0.30000000000000004
Wins: 1304, Losses: 1345, Draws: 652
Episode: 3400, Win Rate: 0.39, Epsilon: 0.522, Smartness: 0.30000000000000004
Wins: 1334, Losses: 1396, Draws: 671
Episode: 3500, Win Rate: 0.39, Epsilon: 0.444, Smartness: 0.30000000000000004
Wins: 1363, Losses: 1451, Draws: 687
Episode: 3600, Win Rate: 0.39, Epsilon: 0.377, Smartness: 0.30000000000000004
Wins: 1402, Losses: 1489, Draws: 710
Episode: 3700, Win Rate: 0.39, Epsilon: 0.321, Smartness: 0.30000000000000004
Wins: 1440, Losses: 1532, Draws: 729
Episode: 3800, Win Rate: 0.39, Epsilon: 0.272, Smartness: 0.30000000000000004
Wi



Episode: 4000, Win Rate: 0.39, Epsilon: 1.000, Smartness: 0.4
Wins: 1575, Losses: 1607, Draws: 819
Episode: 4100, Win Rate: 0.39, Epsilon: 0.850, Smartness: 0.4
Wins: 1589, Losses: 1682, Draws: 830
Episode: 4200, Win Rate: 0.38, Epsilon: 0.722, Smartness: 0.4
Wins: 1610, Losses: 1741, Draws: 850
Episode: 4300, Win Rate: 0.38, Epsilon: 0.614, Smartness: 0.4
Wins: 1636, Losses: 1797, Draws: 868
Episode: 4400, Win Rate: 0.38, Epsilon: 0.522, Smartness: 0.4
Wins: 1663, Losses: 1847, Draws: 891
Episode: 4500, Win Rate: 0.38, Epsilon: 0.444, Smartness: 0.4
Wins: 1699, Losses: 1891, Draws: 911
Episode: 4600, Win Rate: 0.38, Epsilon: 0.377, Smartness: 0.4
Wins: 1732, Losses: 1934, Draws: 935
Episode: 4700, Win Rate: 0.38, Epsilon: 0.321, Smartness: 0.4
Wins: 1763, Losses: 1970, Draws: 968
Episode: 4800, Win Rate: 0.38, Epsilon: 0.272, Smartness: 0.4
Wins: 1803, Losses: 2008, Draws: 990
Episode: 4900, Win Rate: 0.38, Epsilon: 0.232, Smartness: 0.4
Wins: 1840, Losses: 2042, Draws: 1019




Episode: 5000, Win Rate: 0.38, Epsilon: 1.000, Smartness: 0.5
Wins: 1883, Losses: 2069, Draws: 1049
Episode: 5100, Win Rate: 0.37, Epsilon: 0.850, Smartness: 0.5
Wins: 1893, Losses: 2148, Draws: 1060
Episode: 5200, Win Rate: 0.37, Epsilon: 0.722, Smartness: 0.5
Wins: 1907, Losses: 2221, Draws: 1073
Episode: 5300, Win Rate: 0.36, Epsilon: 0.614, Smartness: 0.5
Wins: 1920, Losses: 2296, Draws: 1085
Episode: 5400, Win Rate: 0.36, Epsilon: 0.522, Smartness: 0.5
Wins: 1946, Losses: 2344, Draws: 1111
Episode: 5500, Win Rate: 0.36, Epsilon: 0.444, Smartness: 0.5
Wins: 1971, Losses: 2398, Draws: 1132
Episode: 5600, Win Rate: 0.36, Epsilon: 0.377, Smartness: 0.5
Wins: 1998, Losses: 2448, Draws: 1155
Episode: 5700, Win Rate: 0.36, Epsilon: 0.321, Smartness: 0.5
Wins: 2025, Losses: 2498, Draws: 1178
Episode: 5800, Win Rate: 0.35, Epsilon: 0.272, Smartness: 0.5
Wins: 2058, Losses: 2539, Draws: 1204
Episode: 5900, Win Rate: 0.35, Epsilon: 0.232, Smartness: 0.5
Wins: 2088, Losses: 2573, Draws: 1240




Episode: 6000, Win Rate: 0.35, Epsilon: 1.000, Smartness: 0.6
Wins: 2122, Losses: 2608, Draws: 1271
Episode: 6100, Win Rate: 0.35, Epsilon: 0.850, Smartness: 0.6
Wins: 2134, Losses: 2685, Draws: 1282
Episode: 6200, Win Rate: 0.35, Epsilon: 0.722, Smartness: 0.6
Wins: 2145, Losses: 2760, Draws: 1296
Episode: 6300, Win Rate: 0.34, Epsilon: 0.614, Smartness: 0.6
Wins: 2162, Losses: 2824, Draws: 1315
Episode: 6400, Win Rate: 0.34, Epsilon: 0.522, Smartness: 0.6
Wins: 2177, Losses: 2890, Draws: 1334
Episode: 6500, Win Rate: 0.34, Epsilon: 0.444, Smartness: 0.6
Wins: 2202, Losses: 2941, Draws: 1358
Episode: 6600, Win Rate: 0.34, Epsilon: 0.377, Smartness: 0.6
Wins: 2232, Losses: 2988, Draws: 1381
Episode: 6700, Win Rate: 0.34, Epsilon: 0.321, Smartness: 0.6
Wins: 2259, Losses: 3037, Draws: 1405
Episode: 6800, Win Rate: 0.34, Epsilon: 0.272, Smartness: 0.6
Wins: 2281, Losses: 3091, Draws: 1429
Episode: 6900, Win Rate: 0.33, Epsilon: 0.232, Smartness: 0.6
Wins: 2307, Losses: 3138, Draws: 1456




Episode: 7000, Win Rate: 0.33, Epsilon: 1.000, Smartness: 0.7
Wins: 2337, Losses: 3175, Draws: 1489
Episode: 7100, Win Rate: 0.33, Epsilon: 0.850, Smartness: 0.7
Wins: 2346, Losses: 3252, Draws: 1503
Episode: 7200, Win Rate: 0.33, Epsilon: 0.722, Smartness: 0.7
Wins: 2360, Losses: 3324, Draws: 1517
Episode: 7300, Win Rate: 0.32, Epsilon: 0.614, Smartness: 0.7
Wins: 2370, Losses: 3394, Draws: 1537
Episode: 7400, Win Rate: 0.32, Epsilon: 0.522, Smartness: 0.7
Wins: 2379, Losses: 3467, Draws: 1555
Episode: 7500, Win Rate: 0.32, Epsilon: 0.444, Smartness: 0.7
Wins: 2397, Losses: 3531, Draws: 1573
Episode: 7600, Win Rate: 0.32, Epsilon: 0.377, Smartness: 0.7
Wins: 2412, Losses: 3586, Draws: 1603
Episode: 7700, Win Rate: 0.32, Epsilon: 0.321, Smartness: 0.7
Wins: 2428, Losses: 3634, Draws: 1639
Episode: 7800, Win Rate: 0.31, Epsilon: 0.272, Smartness: 0.7
Wins: 2447, Losses: 3678, Draws: 1676
Episode: 7900, Win Rate: 0.31, Epsilon: 0.232, Smartness: 0.7
Wins: 2473, Losses: 3721, Draws: 1707




Episode: 8000, Win Rate: 0.31, Epsilon: 1.000, Smartness: 0.7999999999999999
Wins: 2493, Losses: 3767, Draws: 1741
Episode: 8100, Win Rate: 0.31, Epsilon: 0.850, Smartness: 0.7999999999999999
Wins: 2499, Losses: 3855, Draws: 1747
Episode: 8200, Win Rate: 0.31, Epsilon: 0.722, Smartness: 0.7999999999999999
Wins: 2507, Losses: 3934, Draws: 1760
Episode: 8300, Win Rate: 0.30, Epsilon: 0.614, Smartness: 0.7999999999999999
Wins: 2516, Losses: 4002, Draws: 1783
Episode: 8400, Win Rate: 0.30, Epsilon: 0.522, Smartness: 0.7999999999999999
Wins: 2527, Losses: 4070, Draws: 1804
Episode: 8500, Win Rate: 0.30, Epsilon: 0.444, Smartness: 0.7999999999999999
Wins: 2538, Losses: 4136, Draws: 1827
Episode: 8600, Win Rate: 0.30, Epsilon: 0.377, Smartness: 0.7999999999999999
Wins: 2545, Losses: 4201, Draws: 1855
Episode: 8700, Win Rate: 0.29, Epsilon: 0.321, Smartness: 0.7999999999999999
Wins: 2560, Losses: 4252, Draws: 1889
Episode: 8800, Win Rate: 0.29, Epsilon: 0.272, Smartness: 0.7999999999999999
Win



Episode: 9000, Win Rate: 0.29, Epsilon: 1.000, Smartness: 0.8
Wins: 2609, Losses: 4394, Draws: 1998
Episode: 9100, Win Rate: 0.29, Epsilon: 0.850, Smartness: 0.8
Wins: 2612, Losses: 4481, Draws: 2008
Episode: 9200, Win Rate: 0.29, Epsilon: 0.722, Smartness: 0.8
Wins: 2625, Losses: 4554, Draws: 2022
Episode: 9300, Win Rate: 0.28, Epsilon: 0.614, Smartness: 0.8
Wins: 2633, Losses: 4632, Draws: 2036
Episode: 9400, Win Rate: 0.28, Epsilon: 0.522, Smartness: 0.8
Wins: 2645, Losses: 4698, Draws: 2058
Episode: 9500, Win Rate: 0.28, Epsilon: 0.444, Smartness: 0.8
Wins: 2657, Losses: 4759, Draws: 2085
Episode: 9600, Win Rate: 0.28, Epsilon: 0.377, Smartness: 0.8
Wins: 2666, Losses: 4822, Draws: 2113
Episode: 9700, Win Rate: 0.28, Epsilon: 0.321, Smartness: 0.8
Wins: 2674, Losses: 4873, Draws: 2154
Episode: 9800, Win Rate: 0.27, Epsilon: 0.272, Smartness: 0.8
Wins: 2688, Losses: 4923, Draws: 2190
Episode: 9900, Win Rate: 0.27, Epsilon: 0.232, Smartness: 0.8
Wins: 2706, Losses: 4971, Draws: 2224




In [8]:
import numpy as np
import random
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.95):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9995
        self.batch_size = 32
        self.replay_buffer = deque(maxlen=10000)
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def process_state(self, state):
        """
        Converts the state from [0,1,2] format to [-1,0,1] format for better learning.
        """
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                next_valid_actions = [j for j, val in enumerate(next_state) if val == 0]
                if next_valid_actions:
                    max_next_q = max(next_q_values[i][action] for action in next_valid_actions)
                    target = reward + self.gamma * max_next_q
                else:
                    target = reward
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=32, epochs=1, verbose=0)

def train_agent(episodes=10000):
    agent = SQNAgent()
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    initial_epsilon = 1.0
    min_epsilon = 0.01
    decay_episodes = 1000
    smartness=0
    for episode in range(episodes):
        if episode % 1000 == 0 and episode > 0:
            agent.epsilon = 1.0
            print("Resetting epsilon to 1.0")
        else:
            epsilon_decay = (initial_epsilon - min_epsilon) / decay_episodes
            agent.epsilon = max(min_epsilon, initial_epsilon - (episode % 1000) * epsilon_decay)
        if episode%1000==0 and episode>0:
            smartness = min(0.8, episode / (episodes * 0.9))
            
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 1.0+smartness
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            game.player1_move()
            if game.current_winner == 1:
                reward = -1.0-smartness
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = min(-0.1,-0.5*smartness)
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        if episode % 100 == 0:
            win_rate = history['wins'] / (episode + 1)
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Epsilon: {agent.epsilon:.3f}, smartmove{smartness}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
        
        if episode % 1000 == 0:
            agent.model.save(f'model11_episode_{episode}.h5')
    agent.model.save('model11.h5')
agent = train_agent()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Episode: 0, Win Rate: 0.00, Epsilon: 1.000, smartmove0
Wins: 0, Losses: 1, Draws: 0
Episode: 100, Win Rate: 0.27, Epsilon: 0.901, smartmove0
Wins: 27, Losses: 56, Draws: 18
Episode: 200, Win Rate: 0.27, Epsilon: 0.802, smartmove0
Wins: 55, Losses: 113, Draws: 33
Episode: 300, Win Rate: 0.27, Epsilon: 0.703, smartmove0
Wins: 81, Losses: 169, Draws: 51
Episode: 400, Win Rate: 0.29, Epsilon: 0.604, smartmove0
Wins: 116, Losses: 222, Draws: 63
Episode: 500, Win Rate: 0.30, Epsilon: 0.505, smartmove0
Wins: 149, Losses: 281, Draws: 71
Episode: 600, Win Rate: 0.30, Epsilon: 0.406, smartmove0
Wins: 183, Losses: 341, Draws: 77
Episode: 700, Win Rate: 0.31, Epsilon: 0.307, smartmove0
Wins: 216, Losses: 398, Draws: 87
Episode: 800, Win Rate: 0.32, Epsilon: 0.208, smartmove0
Wins: 255, Losses: 451, Draws: 95
Episode: 900, Win Rate: 0.32, Epsilon: 0.109, smartmove0
Wins: 291, Losses: 505, Draws: 105




Resetting epsilon to 1.0
Episode: 1000, Win Rate: 0.32, Epsilon: 1.000, smartmove0.1111111111111111
Wins: 323, Losses: 565, Draws: 113
Episode: 1100, Win Rate: 0.32, Epsilon: 0.901, smartmove0.1111111111111111
Wins: 348, Losses: 627, Draws: 126
Episode: 1200, Win Rate: 0.31, Epsilon: 0.802, smartmove0.1111111111111111
Wins: 377, Losses: 691, Draws: 133
Episode: 1300, Win Rate: 0.31, Epsilon: 0.703, smartmove0.1111111111111111
Wins: 407, Losses: 749, Draws: 145
Episode: 1400, Win Rate: 0.31, Epsilon: 0.604, smartmove0.1111111111111111
Wins: 431, Losses: 816, Draws: 154
Episode: 1500, Win Rate: 0.31, Epsilon: 0.505, smartmove0.1111111111111111
Wins: 463, Losses: 873, Draws: 165
Episode: 1600, Win Rate: 0.32, Epsilon: 0.406, smartmove0.1111111111111111
Wins: 505, Losses: 920, Draws: 176
Episode: 1700, Win Rate: 0.32, Epsilon: 0.307, smartmove0.1111111111111111
Wins: 543, Losses: 973, Draws: 185
Episode: 1800, Win Rate: 0.32, Epsilon: 0.208, smartmove0.1111111111111111
Wins: 579, Losses: 1



Resetting epsilon to 1.0
Episode: 2000, Win Rate: 0.33, Epsilon: 1.000, smartmove0.2222222222222222
Wins: 659, Losses: 1132, Draws: 210
Episode: 2100, Win Rate: 0.32, Epsilon: 0.901, smartmove0.2222222222222222
Wins: 678, Losses: 1202, Draws: 221
Episode: 2200, Win Rate: 0.32, Epsilon: 0.802, smartmove0.2222222222222222
Wins: 698, Losses: 1269, Draws: 234
Episode: 2300, Win Rate: 0.32, Epsilon: 0.703, smartmove0.2222222222222222
Wins: 730, Losses: 1328, Draws: 243
Episode: 2400, Win Rate: 0.31, Epsilon: 0.604, smartmove0.2222222222222222
Wins: 752, Losses: 1394, Draws: 255
Episode: 2500, Win Rate: 0.31, Epsilon: 0.505, smartmove0.2222222222222222
Wins: 784, Losses: 1455, Draws: 262
Episode: 2600, Win Rate: 0.31, Epsilon: 0.406, smartmove0.2222222222222222
Wins: 818, Losses: 1512, Draws: 271
Episode: 2700, Win Rate: 0.32, Epsilon: 0.307, smartmove0.2222222222222222
Wins: 858, Losses: 1568, Draws: 275
Episode: 2800, Win Rate: 0.32, Epsilon: 0.208, smartmove0.2222222222222222
Wins: 884, L



Resetting epsilon to 1.0
Episode: 3000, Win Rate: 0.32, Epsilon: 1.000, smartmove0.3333333333333333
Wins: 947, Losses: 1751, Draws: 303
Episode: 3100, Win Rate: 0.31, Epsilon: 0.901, smartmove0.3333333333333333
Wins: 972, Losses: 1813, Draws: 316
Episode: 3200, Win Rate: 0.31, Epsilon: 0.802, smartmove0.3333333333333333
Wins: 987, Losses: 1889, Draws: 325
Episode: 3300, Win Rate: 0.30, Epsilon: 0.703, smartmove0.3333333333333333
Wins: 1005, Losses: 1957, Draws: 339
Episode: 3400, Win Rate: 0.30, Epsilon: 0.604, smartmove0.3333333333333333
Wins: 1032, Losses: 2018, Draws: 351
Episode: 3500, Win Rate: 0.30, Epsilon: 0.505, smartmove0.3333333333333333
Wins: 1052, Losses: 2086, Draws: 363
Episode: 3600, Win Rate: 0.30, Epsilon: 0.406, smartmove0.3333333333333333
Wins: 1083, Losses: 2146, Draws: 372
Episode: 3700, Win Rate: 0.30, Epsilon: 0.307, smartmove0.3333333333333333
Wins: 1116, Losses: 2203, Draws: 382
Episode: 3800, Win Rate: 0.30, Epsilon: 0.208, smartmove0.3333333333333333
Wins: 1



Resetting epsilon to 1.0
Episode: 4000, Win Rate: 0.30, Epsilon: 1.000, smartmove0.4444444444444444
Wins: 1201, Losses: 2391, Draws: 409
Episode: 4100, Win Rate: 0.30, Epsilon: 0.901, smartmove0.4444444444444444
Wins: 1214, Losses: 2467, Draws: 420
Episode: 4200, Win Rate: 0.29, Epsilon: 0.802, smartmove0.4444444444444444
Wins: 1225, Losses: 2537, Draws: 439
Episode: 4300, Win Rate: 0.29, Epsilon: 0.703, smartmove0.4444444444444444
Wins: 1248, Losses: 2605, Draws: 448
Episode: 4400, Win Rate: 0.29, Epsilon: 0.604, smartmove0.4444444444444444
Wins: 1262, Losses: 2682, Draws: 457
Episode: 4500, Win Rate: 0.29, Epsilon: 0.505, smartmove0.4444444444444444
Wins: 1294, Losses: 2742, Draws: 465
Episode: 4600, Win Rate: 0.29, Epsilon: 0.406, smartmove0.4444444444444444
Wins: 1320, Losses: 2806, Draws: 475
Episode: 4700, Win Rate: 0.29, Epsilon: 0.307, smartmove0.4444444444444444
Wins: 1341, Losses: 2871, Draws: 489
Episode: 4800, Win Rate: 0.28, Epsilon: 0.208, smartmove0.4444444444444444
Wins



Resetting epsilon to 1.0
Episode: 5000, Win Rate: 0.28, Epsilon: 1.000, smartmove0.5555555555555556
Wins: 1410, Losses: 3066, Draws: 525
Episode: 5100, Win Rate: 0.28, Epsilon: 0.901, smartmove0.5555555555555556
Wins: 1421, Losses: 3144, Draws: 536
Episode: 5200, Win Rate: 0.28, Epsilon: 0.802, smartmove0.5555555555555556
Wins: 1435, Losses: 3224, Draws: 542
Episode: 5300, Win Rate: 0.27, Epsilon: 0.703, smartmove0.5555555555555556
Wins: 1445, Losses: 3296, Draws: 560
Episode: 5400, Win Rate: 0.27, Epsilon: 0.604, smartmove0.5555555555555556
Wins: 1460, Losses: 3369, Draws: 572
Episode: 5500, Win Rate: 0.27, Epsilon: 0.505, smartmove0.5555555555555556
Wins: 1481, Losses: 3433, Draws: 587
Episode: 5600, Win Rate: 0.27, Epsilon: 0.406, smartmove0.5555555555555556
Wins: 1499, Losses: 3506, Draws: 596
Episode: 5700, Win Rate: 0.27, Epsilon: 0.307, smartmove0.5555555555555556
Wins: 1520, Losses: 3574, Draws: 607
Episode: 5800, Win Rate: 0.27, Epsilon: 0.208, smartmove0.5555555555555556
Wins



Resetting epsilon to 1.0
Episode: 6000, Win Rate: 0.27, Epsilon: 1.000, smartmove0.6666666666666666
Wins: 1592, Losses: 3763, Draws: 646
Episode: 6100, Win Rate: 0.26, Epsilon: 0.901, smartmove0.6666666666666666
Wins: 1599, Losses: 3845, Draws: 657
Episode: 6200, Win Rate: 0.26, Epsilon: 0.802, smartmove0.6666666666666666
Wins: 1609, Losses: 3926, Draws: 666
Episode: 6300, Win Rate: 0.26, Epsilon: 0.703, smartmove0.6666666666666666
Wins: 1617, Losses: 4007, Draws: 677
Episode: 6400, Win Rate: 0.25, Epsilon: 0.604, smartmove0.6666666666666666
Wins: 1628, Losses: 4080, Draws: 693
Episode: 6500, Win Rate: 0.25, Epsilon: 0.505, smartmove0.6666666666666666
Wins: 1650, Losses: 4148, Draws: 703
Episode: 6600, Win Rate: 0.25, Epsilon: 0.406, smartmove0.6666666666666666
Wins: 1664, Losses: 4222, Draws: 715
Episode: 6700, Win Rate: 0.25, Epsilon: 0.307, smartmove0.6666666666666666
Wins: 1683, Losses: 4295, Draws: 723
Episode: 6800, Win Rate: 0.25, Epsilon: 0.208, smartmove0.6666666666666666
Wins



Resetting epsilon to 1.0
Episode: 7000, Win Rate: 0.25, Epsilon: 1.000, smartmove0.7777777777777778
Wins: 1745, Losses: 4472, Draws: 784
Episode: 7100, Win Rate: 0.25, Epsilon: 0.901, smartmove0.7777777777777778
Wins: 1748, Losses: 4561, Draws: 792
Episode: 7200, Win Rate: 0.24, Epsilon: 0.802, smartmove0.7777777777777778
Wins: 1762, Losses: 4634, Draws: 805
Episode: 7300, Win Rate: 0.24, Epsilon: 0.703, smartmove0.7777777777777778
Wins: 1768, Losses: 4713, Draws: 820
Episode: 7400, Win Rate: 0.24, Epsilon: 0.604, smartmove0.7777777777777778
Wins: 1771, Losses: 4797, Draws: 833
Episode: 7500, Win Rate: 0.24, Epsilon: 0.505, smartmove0.7777777777777778
Wins: 1784, Losses: 4869, Draws: 848
Episode: 7600, Win Rate: 0.24, Epsilon: 0.406, smartmove0.7777777777777778
Wins: 1794, Losses: 4943, Draws: 864
Episode: 7700, Win Rate: 0.23, Epsilon: 0.307, smartmove0.7777777777777778
Wins: 1808, Losses: 5015, Draws: 878
Episode: 7800, Win Rate: 0.23, Epsilon: 0.208, smartmove0.7777777777777778
Wins



Resetting epsilon to 1.0
Episode: 8000, Win Rate: 0.23, Epsilon: 1.000, smartmove0.8
Wins: 1852, Losses: 5206, Draws: 943
Episode: 8100, Win Rate: 0.23, Epsilon: 0.901, smartmove0.8
Wins: 1859, Losses: 5291, Draws: 951
Episode: 8200, Win Rate: 0.23, Epsilon: 0.802, smartmove0.8
Wins: 1870, Losses: 5371, Draws: 960
Episode: 8300, Win Rate: 0.23, Epsilon: 0.703, smartmove0.8
Wins: 1884, Losses: 5447, Draws: 970
Episode: 8400, Win Rate: 0.22, Epsilon: 0.604, smartmove0.8
Wins: 1890, Losses: 5525, Draws: 986
Episode: 8500, Win Rate: 0.22, Epsilon: 0.505, smartmove0.8
Wins: 1901, Losses: 5596, Draws: 1004
Episode: 8600, Win Rate: 0.22, Epsilon: 0.406, smartmove0.8
Wins: 1913, Losses: 5670, Draws: 1018
Episode: 8700, Win Rate: 0.22, Epsilon: 0.307, smartmove0.8
Wins: 1925, Losses: 5739, Draws: 1037
Episode: 8800, Win Rate: 0.22, Epsilon: 0.208, smartmove0.8
Wins: 1931, Losses: 5815, Draws: 1055
Episode: 8900, Win Rate: 0.22, Epsilon: 0.109, smartmove0.8
Wins: 1949, Losses: 5876, Draws: 1076




Resetting epsilon to 1.0
Episode: 9000, Win Rate: 0.22, Epsilon: 1.000, smartmove0.8
Wins: 1964, Losses: 5937, Draws: 1100
Episode: 9100, Win Rate: 0.22, Epsilon: 0.901, smartmove0.8
Wins: 1968, Losses: 6025, Draws: 1108
Episode: 9200, Win Rate: 0.21, Epsilon: 0.802, smartmove0.8
Wins: 1975, Losses: 6105, Draws: 1121
Episode: 9300, Win Rate: 0.21, Epsilon: 0.703, smartmove0.8
Wins: 1981, Losses: 6183, Draws: 1137
Episode: 9400, Win Rate: 0.21, Epsilon: 0.604, smartmove0.8
Wins: 1991, Losses: 6254, Draws: 1156
Episode: 9500, Win Rate: 0.21, Epsilon: 0.505, smartmove0.8
Wins: 1996, Losses: 6332, Draws: 1173
Episode: 9600, Win Rate: 0.21, Epsilon: 0.406, smartmove0.8
Wins: 2008, Losses: 6403, Draws: 1190
Episode: 9700, Win Rate: 0.21, Epsilon: 0.307, smartmove0.8
Wins: 2021, Losses: 6471, Draws: 1209
Episode: 9800, Win Rate: 0.21, Epsilon: 0.208, smartmove0.8
Wins: 2037, Losses: 6528, Draws: 1236
Episode: 9900, Win Rate: 0.21, Epsilon: 0.109, smartmove0.8
Wins: 2056, Losses: 6589, Draws: 



In [12]:
import numpy as np
import random
from collections import deque
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from TicTacToe import TicTacToe
import os

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.95, model_path=None):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9995
        self.batch_size = 32
        self.replay_buffer = deque(maxlen=10000)
        
        if model_path and os.path.isfile(model_path):
            print(f"Loading model from {model_path}")
            self.model = load_model(model_path)
        else:
            print("No existing model found; initializing a new model.")
            self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def process_state(self, state):
        """
        Converts the state from [0,1,2] format to [-1,0,1] format for better learning.
        """
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                next_valid_actions = [j for j, val in enumerate(next_state) if val == 0]
                if next_valid_actions:
                    max_next_q = max(next_q_values[i][action] for action in next_valid_actions)
                    target = reward + self.gamma * max_next_q
                else:
                    target = reward
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=32, epochs=1, verbose=0)

def train_agent(episodes=20000):
    agent = SQNAgent()
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    initial_epsilon = 1.0
    min_epsilon = 0.01
    decay_episodes = 1000
    smartness=0
    for episode in range(episodes):
        if episode % 2000 == 0 and episode > 0:
            agent.epsilon = 1.0
            print("Resetting epsilon to 1.0")
        else:
            epsilon_decay = (initial_epsilon - min_epsilon) / decay_episodes
            agent.epsilon = max(min_epsilon, initial_epsilon - (episode % 2000) * epsilon_decay)
        if episode%2000==0 and episode>0:
            smartness = min(0.8, episode / (episodes * 0.9))
            
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 1.0+smartness
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            game.player1_move()
            if game.current_winner == 1:
                reward = -1.0-smartness
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = min(-0.1,-0.5*smartness)
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        if episode % 100 == 0:
            win_rate = history['wins'] / (episode + 1)
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Epsilon: {agent.epsilon:.3f}, smartmove{smartness}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
        
        if episode % 1000 == 0:
            agent.model.save(f'model11_retrain_episode_{episode}.h5')
    agent.model.save('model11_retrain.h5')
agent = train_agent()




No existing model found; initializing a new model.
Episode: 0, Win Rate: 0.00, Epsilon: 1.000, smartmove0
Wins: 0, Losses: 1, Draws: 0
Episode: 100, Win Rate: 0.30, Epsilon: 0.901, smartmove0
Wins: 30, Losses: 61, Draws: 10
Episode: 200, Win Rate: 0.27, Epsilon: 0.802, smartmove0
Wins: 54, Losses: 121, Draws: 26
Episode: 300, Win Rate: 0.28, Epsilon: 0.703, smartmove0
Wins: 85, Losses: 180, Draws: 36
Episode: 400, Win Rate: 0.30, Epsilon: 0.604, smartmove0
Wins: 122, Losses: 235, Draws: 44
Episode: 500, Win Rate: 0.32, Epsilon: 0.505, smartmove0
Wins: 159, Losses: 284, Draws: 58
Episode: 600, Win Rate: 0.31, Epsilon: 0.406, smartmove0
Wins: 185, Losses: 347, Draws: 69
Episode: 700, Win Rate: 0.32, Epsilon: 0.307, smartmove0
Wins: 223, Losses: 402, Draws: 76
Episode: 800, Win Rate: 0.32, Epsilon: 0.208, smartmove0
Wins: 258, Losses: 459, Draws: 84
Episode: 900, Win Rate: 0.33, Epsilon: 0.109, smartmove0
Wins: 299, Losses: 508, Draws: 94




Episode: 1000, Win Rate: 0.34, Epsilon: 0.010, smartmove0
Wins: 339, Losses: 561, Draws: 101
Episode: 1100, Win Rate: 0.34, Epsilon: 0.010, smartmove0
Wins: 377, Losses: 614, Draws: 110
Episode: 1200, Win Rate: 0.35, Epsilon: 0.010, smartmove0
Wins: 419, Losses: 666, Draws: 116
Episode: 1300, Win Rate: 0.36, Epsilon: 0.010, smartmove0
Wins: 463, Losses: 713, Draws: 125
Episode: 1400, Win Rate: 0.36, Epsilon: 0.010, smartmove0
Wins: 503, Losses: 765, Draws: 133
Episode: 1500, Win Rate: 0.37, Epsilon: 0.010, smartmove0
Wins: 549, Losses: 811, Draws: 141
Episode: 1600, Win Rate: 0.37, Epsilon: 0.010, smartmove0
Wins: 592, Losses: 858, Draws: 151
Episode: 1700, Win Rate: 0.37, Epsilon: 0.010, smartmove0
Wins: 621, Losses: 918, Draws: 162
Episode: 1800, Win Rate: 0.36, Epsilon: 0.010, smartmove0
Wins: 652, Losses: 981, Draws: 168
Episode: 1900, Win Rate: 0.36, Epsilon: 0.010, smartmove0
Wins: 690, Losses: 1031, Draws: 180




Resetting epsilon to 1.0
Episode: 2000, Win Rate: 0.36, Epsilon: 1.000, smartmove0.1111111111111111
Wins: 727, Losses: 1081, Draws: 193
Episode: 2100, Win Rate: 0.35, Epsilon: 0.901, smartmove0.1111111111111111
Wins: 744, Losses: 1146, Draws: 211
Episode: 2200, Win Rate: 0.35, Epsilon: 0.802, smartmove0.1111111111111111
Wins: 773, Losses: 1206, Draws: 222
Episode: 2300, Win Rate: 0.35, Epsilon: 0.703, smartmove0.1111111111111111
Wins: 801, Losses: 1265, Draws: 235
Episode: 2400, Win Rate: 0.35, Epsilon: 0.604, smartmove0.1111111111111111
Wins: 831, Losses: 1326, Draws: 244
Episode: 2500, Win Rate: 0.34, Epsilon: 0.505, smartmove0.1111111111111111
Wins: 862, Losses: 1386, Draws: 253
Episode: 2600, Win Rate: 0.34, Epsilon: 0.406, smartmove0.1111111111111111
Wins: 888, Losses: 1451, Draws: 262
Episode: 2700, Win Rate: 0.34, Epsilon: 0.307, smartmove0.1111111111111111
Wins: 922, Losses: 1500, Draws: 279
Episode: 2800, Win Rate: 0.34, Epsilon: 0.208, smartmove0.1111111111111111
Wins: 954, L



Episode: 3000, Win Rate: 0.34, Epsilon: 0.010, smartmove0.1111111111111111
Wins: 1028, Losses: 1647, Draws: 326
Episode: 3100, Win Rate: 0.35, Epsilon: 0.010, smartmove0.1111111111111111
Wins: 1074, Losses: 1691, Draws: 336
Episode: 3200, Win Rate: 0.35, Epsilon: 0.010, smartmove0.1111111111111111
Wins: 1110, Losses: 1743, Draws: 348
Episode: 3300, Win Rate: 0.35, Epsilon: 0.010, smartmove0.1111111111111111
Wins: 1153, Losses: 1790, Draws: 358
Episode: 3400, Win Rate: 0.35, Epsilon: 0.010, smartmove0.1111111111111111
Wins: 1196, Losses: 1831, Draws: 374
Episode: 3500, Win Rate: 0.35, Epsilon: 0.010, smartmove0.1111111111111111
Wins: 1237, Losses: 1873, Draws: 391
Episode: 3600, Win Rate: 0.36, Epsilon: 0.010, smartmove0.1111111111111111
Wins: 1281, Losses: 1917, Draws: 403
Episode: 3700, Win Rate: 0.35, Epsilon: 0.010, smartmove0.1111111111111111
Wins: 1312, Losses: 1967, Draws: 422
Episode: 3800, Win Rate: 0.36, Epsilon: 0.010, smartmove0.1111111111111111
Wins: 1359, Losses: 2012, Dra



Resetting epsilon to 1.0
Episode: 4000, Win Rate: 0.36, Epsilon: 1.000, smartmove0.2222222222222222
Wins: 1431, Losses: 2111, Draws: 459
Episode: 4100, Win Rate: 0.35, Epsilon: 0.901, smartmove0.2222222222222222
Wins: 1450, Losses: 2181, Draws: 470
Episode: 4200, Win Rate: 0.35, Epsilon: 0.802, smartmove0.2222222222222222
Wins: 1468, Losses: 2247, Draws: 486
Episode: 4300, Win Rate: 0.35, Epsilon: 0.703, smartmove0.2222222222222222
Wins: 1496, Losses: 2308, Draws: 497
Episode: 4400, Win Rate: 0.34, Epsilon: 0.604, smartmove0.2222222222222222
Wins: 1518, Losses: 2379, Draws: 504
Episode: 4500, Win Rate: 0.34, Epsilon: 0.505, smartmove0.2222222222222222
Wins: 1547, Losses: 2439, Draws: 515
Episode: 4600, Win Rate: 0.34, Epsilon: 0.406, smartmove0.2222222222222222
Wins: 1570, Losses: 2498, Draws: 533
Episode: 4700, Win Rate: 0.34, Epsilon: 0.307, smartmove0.2222222222222222
Wins: 1594, Losses: 2565, Draws: 542
Episode: 4800, Win Rate: 0.34, Epsilon: 0.208, smartmove0.2222222222222222
Wins



Episode: 5000, Win Rate: 0.34, Epsilon: 0.010, smartmove0.2222222222222222
Wins: 1677, Losses: 2734, Draws: 590
Episode: 5100, Win Rate: 0.34, Epsilon: 0.010, smartmove0.2222222222222222
Wins: 1712, Losses: 2777, Draws: 612
Episode: 5200, Win Rate: 0.34, Epsilon: 0.010, smartmove0.2222222222222222
Wins: 1749, Losses: 2823, Draws: 629
Episode: 5300, Win Rate: 0.33, Epsilon: 0.010, smartmove0.2222222222222222
Wins: 1773, Losses: 2876, Draws: 652
Episode: 5400, Win Rate: 0.34, Epsilon: 0.010, smartmove0.2222222222222222
Wins: 1814, Losses: 2923, Draws: 664
Episode: 5500, Win Rate: 0.34, Epsilon: 0.010, smartmove0.2222222222222222
Wins: 1859, Losses: 2961, Draws: 681
Episode: 5600, Win Rate: 0.34, Epsilon: 0.010, smartmove0.2222222222222222
Wins: 1894, Losses: 2999, Draws: 708
Episode: 5700, Win Rate: 0.34, Epsilon: 0.010, smartmove0.2222222222222222
Wins: 1930, Losses: 3048, Draws: 723
Episode: 5800, Win Rate: 0.34, Epsilon: 0.010, smartmove0.2222222222222222
Wins: 1971, Losses: 3090, Dra



Resetting epsilon to 1.0
Episode: 6000, Win Rate: 0.34, Epsilon: 1.000, smartmove0.3333333333333333
Wins: 2041, Losses: 3185, Draws: 775
Episode: 6100, Win Rate: 0.34, Epsilon: 0.901, smartmove0.3333333333333333
Wins: 2058, Losses: 3253, Draws: 790
Episode: 6200, Win Rate: 0.34, Epsilon: 0.802, smartmove0.3333333333333333
Wins: 2087, Losses: 3307, Draws: 807
Episode: 6300, Win Rate: 0.33, Epsilon: 0.703, smartmove0.3333333333333333
Wins: 2106, Losses: 3378, Draws: 817
Episode: 6400, Win Rate: 0.33, Epsilon: 0.604, smartmove0.3333333333333333
Wins: 2133, Losses: 3435, Draws: 833
Episode: 6500, Win Rate: 0.33, Epsilon: 0.505, smartmove0.3333333333333333
Wins: 2164, Losses: 3489, Draws: 848
Episode: 6600, Win Rate: 0.33, Epsilon: 0.406, smartmove0.3333333333333333
Wins: 2190, Losses: 3544, Draws: 867
Episode: 6700, Win Rate: 0.33, Epsilon: 0.307, smartmove0.3333333333333333
Wins: 2219, Losses: 3592, Draws: 890
Episode: 6800, Win Rate: 0.33, Epsilon: 0.208, smartmove0.3333333333333333
Wins



Episode: 7000, Win Rate: 0.33, Epsilon: 0.010, smartmove0.3333333333333333
Wins: 2313, Losses: 3748, Draws: 940
Episode: 7100, Win Rate: 0.33, Epsilon: 0.010, smartmove0.3333333333333333
Wins: 2344, Losses: 3798, Draws: 959
Episode: 7200, Win Rate: 0.33, Epsilon: 0.010, smartmove0.3333333333333333
Wins: 2384, Losses: 3844, Draws: 973
Episode: 7300, Win Rate: 0.33, Epsilon: 0.010, smartmove0.3333333333333333
Wins: 2408, Losses: 3901, Draws: 992
Episode: 7400, Win Rate: 0.33, Epsilon: 0.010, smartmove0.3333333333333333
Wins: 2442, Losses: 3947, Draws: 1012
Episode: 7500, Win Rate: 0.33, Epsilon: 0.010, smartmove0.3333333333333333
Wins: 2479, Losses: 3991, Draws: 1031
Episode: 7600, Win Rate: 0.33, Epsilon: 0.010, smartmove0.3333333333333333
Wins: 2508, Losses: 4042, Draws: 1051
Episode: 7700, Win Rate: 0.33, Epsilon: 0.010, smartmove0.3333333333333333
Wins: 2532, Losses: 4099, Draws: 1070
Episode: 7800, Win Rate: 0.33, Epsilon: 0.010, smartmove0.3333333333333333
Wins: 2558, Losses: 4149,



Resetting epsilon to 1.0
Episode: 8000, Win Rate: 0.33, Epsilon: 1.000, smartmove0.4444444444444444
Wins: 2617, Losses: 4253, Draws: 1131
Episode: 8100, Win Rate: 0.33, Epsilon: 0.901, smartmove0.4444444444444444
Wins: 2633, Losses: 4325, Draws: 1143
Episode: 8200, Win Rate: 0.32, Epsilon: 0.802, smartmove0.4444444444444444
Wins: 2651, Losses: 4389, Draws: 1161
Episode: 8300, Win Rate: 0.32, Epsilon: 0.703, smartmove0.4444444444444444
Wins: 2680, Losses: 4452, Draws: 1169
Episode: 8400, Win Rate: 0.32, Epsilon: 0.604, smartmove0.4444444444444444
Wins: 2700, Losses: 4522, Draws: 1179
Episode: 8500, Win Rate: 0.32, Epsilon: 0.505, smartmove0.4444444444444444
Wins: 2724, Losses: 4586, Draws: 1191
Episode: 8600, Win Rate: 0.32, Epsilon: 0.406, smartmove0.4444444444444444
Wins: 2758, Losses: 4639, Draws: 1204
Episode: 8700, Win Rate: 0.32, Epsilon: 0.307, smartmove0.4444444444444444
Wins: 2775, Losses: 4705, Draws: 1221
Episode: 8800, Win Rate: 0.32, Epsilon: 0.208, smartmove0.4444444444444



Episode: 9000, Win Rate: 0.32, Epsilon: 0.010, smartmove0.4444444444444444
Wins: 2849, Losses: 4877, Draws: 1275
Episode: 9100, Win Rate: 0.32, Epsilon: 0.010, smartmove0.4444444444444444
Wins: 2887, Losses: 4920, Draws: 1294
Episode: 9200, Win Rate: 0.32, Epsilon: 0.010, smartmove0.4444444444444444
Wins: 2928, Losses: 4967, Draws: 1306
Episode: 9300, Win Rate: 0.32, Epsilon: 0.010, smartmove0.4444444444444444
Wins: 2952, Losses: 5014, Draws: 1335
Episode: 9400, Win Rate: 0.32, Epsilon: 0.010, smartmove0.4444444444444444
Wins: 2983, Losses: 5058, Draws: 1360
Episode: 9500, Win Rate: 0.32, Epsilon: 0.010, smartmove0.4444444444444444
Wins: 3009, Losses: 5113, Draws: 1379
Episode: 9600, Win Rate: 0.32, Epsilon: 0.010, smartmove0.4444444444444444
Wins: 3039, Losses: 5163, Draws: 1399
Episode: 9700, Win Rate: 0.32, Epsilon: 0.010, smartmove0.4444444444444444
Wins: 3076, Losses: 5200, Draws: 1425
Episode: 9800, Win Rate: 0.32, Epsilon: 0.010, smartmove0.4444444444444444
Wins: 3099, Losses: 5



Resetting epsilon to 1.0
Episode: 10000, Win Rate: 0.32, Epsilon: 1.000, smartmove0.5555555555555556
Wins: 3157, Losses: 5360, Draws: 1484
Episode: 10100, Win Rate: 0.31, Epsilon: 0.901, smartmove0.5555555555555556
Wins: 3169, Losses: 5436, Draws: 1496
Episode: 10200, Win Rate: 0.31, Epsilon: 0.802, smartmove0.5555555555555556
Wins: 3184, Losses: 5511, Draws: 1506
Episode: 10300, Win Rate: 0.31, Epsilon: 0.703, smartmove0.5555555555555556
Wins: 3198, Losses: 5588, Draws: 1515
Episode: 10400, Win Rate: 0.31, Epsilon: 0.604, smartmove0.5555555555555556
Wins: 3214, Losses: 5659, Draws: 1528
Episode: 10500, Win Rate: 0.31, Epsilon: 0.505, smartmove0.5555555555555556
Wins: 3228, Losses: 5733, Draws: 1540
Episode: 10600, Win Rate: 0.31, Epsilon: 0.406, smartmove0.5555555555555556
Wins: 3257, Losses: 5787, Draws: 1557
Episode: 10700, Win Rate: 0.31, Epsilon: 0.307, smartmove0.5555555555555556
Wins: 3278, Losses: 5848, Draws: 1575
Episode: 10800, Win Rate: 0.31, Epsilon: 0.208, smartmove0.5555



Episode: 11000, Win Rate: 0.30, Epsilon: 0.010, smartmove0.5555555555555556
Wins: 3337, Losses: 6020, Draws: 1644
Episode: 11100, Win Rate: 0.30, Epsilon: 0.010, smartmove0.5555555555555556
Wins: 3367, Losses: 6066, Draws: 1668
Episode: 11200, Win Rate: 0.30, Epsilon: 0.010, smartmove0.5555555555555556
Wins: 3394, Losses: 6121, Draws: 1686
Episode: 11300, Win Rate: 0.30, Epsilon: 0.010, smartmove0.5555555555555556
Wins: 3424, Losses: 6160, Draws: 1717
Episode: 11400, Win Rate: 0.30, Epsilon: 0.010, smartmove0.5555555555555556
Wins: 3454, Losses: 6198, Draws: 1749
Episode: 11500, Win Rate: 0.30, Epsilon: 0.010, smartmove0.5555555555555556
Wins: 3484, Losses: 6243, Draws: 1774
Episode: 11600, Win Rate: 0.30, Epsilon: 0.010, smartmove0.5555555555555556
Wins: 3513, Losses: 6291, Draws: 1797
Episode: 11700, Win Rate: 0.30, Epsilon: 0.010, smartmove0.5555555555555556
Wins: 3535, Losses: 6342, Draws: 1824
Episode: 11800, Win Rate: 0.30, Epsilon: 0.010, smartmove0.5555555555555556
Wins: 3560, 



Resetting epsilon to 1.0
Episode: 12000, Win Rate: 0.30, Epsilon: 1.000, smartmove0.6666666666666666
Wins: 3627, Losses: 6471, Draws: 1903
Episode: 12100, Win Rate: 0.30, Epsilon: 0.901, smartmove0.6666666666666666
Wins: 3635, Losses: 6550, Draws: 1916
Episode: 12200, Win Rate: 0.30, Epsilon: 0.802, smartmove0.6666666666666666
Wins: 3647, Losses: 6628, Draws: 1926
Episode: 12300, Win Rate: 0.30, Epsilon: 0.703, smartmove0.6666666666666666
Wins: 3665, Losses: 6694, Draws: 1942
Episode: 12400, Win Rate: 0.30, Epsilon: 0.604, smartmove0.6666666666666666
Wins: 3676, Losses: 6765, Draws: 1960
Episode: 12500, Win Rate: 0.30, Epsilon: 0.505, smartmove0.6666666666666666
Wins: 3689, Losses: 6830, Draws: 1982
Episode: 12600, Win Rate: 0.29, Epsilon: 0.406, smartmove0.6666666666666666
Wins: 3699, Losses: 6903, Draws: 1999
Episode: 12700, Win Rate: 0.29, Epsilon: 0.307, smartmove0.6666666666666666
Wins: 3717, Losses: 6962, Draws: 2022
Episode: 12800, Win Rate: 0.29, Epsilon: 0.208, smartmove0.6666



Episode: 13000, Win Rate: 0.29, Epsilon: 0.010, smartmove0.6666666666666666
Wins: 3775, Losses: 7140, Draws: 2086
Episode: 13100, Win Rate: 0.29, Epsilon: 0.010, smartmove0.6666666666666666
Wins: 3804, Losses: 7188, Draws: 2109
Episode: 13200, Win Rate: 0.29, Epsilon: 0.010, smartmove0.6666666666666666
Wins: 3821, Losses: 7236, Draws: 2144
Episode: 13300, Win Rate: 0.29, Epsilon: 0.010, smartmove0.6666666666666666
Wins: 3834, Losses: 7293, Draws: 2174
Episode: 13400, Win Rate: 0.29, Epsilon: 0.010, smartmove0.6666666666666666
Wins: 3859, Losses: 7350, Draws: 2192
Episode: 13500, Win Rate: 0.29, Epsilon: 0.010, smartmove0.6666666666666666
Wins: 3874, Losses: 7411, Draws: 2216
Episode: 13600, Win Rate: 0.29, Epsilon: 0.010, smartmove0.6666666666666666
Wins: 3903, Losses: 7455, Draws: 2243
Episode: 13700, Win Rate: 0.29, Epsilon: 0.010, smartmove0.6666666666666666
Wins: 3928, Losses: 7499, Draws: 2274
Episode: 13800, Win Rate: 0.29, Epsilon: 0.010, smartmove0.6666666666666666
Wins: 3958, 



Resetting epsilon to 1.0
Episode: 14000, Win Rate: 0.28, Epsilon: 1.000, smartmove0.7777777777777778
Wins: 3985, Losses: 7651, Draws: 2365
Episode: 14100, Win Rate: 0.28, Epsilon: 0.901, smartmove0.7777777777777778
Wins: 3996, Losses: 7733, Draws: 2372
Episode: 14200, Win Rate: 0.28, Epsilon: 0.802, smartmove0.7777777777777778
Wins: 4005, Losses: 7821, Draws: 2375
Episode: 14300, Win Rate: 0.28, Epsilon: 0.703, smartmove0.7777777777777778
Wins: 4012, Losses: 7903, Draws: 2386
Episode: 14400, Win Rate: 0.28, Epsilon: 0.604, smartmove0.7777777777777778
Wins: 4023, Losses: 7983, Draws: 2395
Episode: 14500, Win Rate: 0.28, Epsilon: 0.505, smartmove0.7777777777777778
Wins: 4040, Losses: 8047, Draws: 2414
Episode: 14600, Win Rate: 0.28, Epsilon: 0.406, smartmove0.7777777777777778
Wins: 4056, Losses: 8114, Draws: 2431
Episode: 14700, Win Rate: 0.28, Epsilon: 0.307, smartmove0.7777777777777778
Wins: 4068, Losses: 8181, Draws: 2452
Episode: 14800, Win Rate: 0.28, Epsilon: 0.208, smartmove0.7777



Episode: 15000, Win Rate: 0.27, Epsilon: 0.010, smartmove0.7777777777777778
Wins: 4108, Losses: 8362, Draws: 2531
Episode: 15100, Win Rate: 0.27, Epsilon: 0.010, smartmove0.7777777777777778
Wins: 4117, Losses: 8421, Draws: 2563
Episode: 15200, Win Rate: 0.27, Epsilon: 0.010, smartmove0.7777777777777778
Wins: 4127, Losses: 8475, Draws: 2599
Episode: 15300, Win Rate: 0.27, Epsilon: 0.010, smartmove0.7777777777777778
Wins: 4141, Losses: 8527, Draws: 2633
Episode: 15400, Win Rate: 0.27, Epsilon: 0.010, smartmove0.7777777777777778
Wins: 4158, Losses: 8587, Draws: 2656
Episode: 15500, Win Rate: 0.27, Epsilon: 0.010, smartmove0.7777777777777778
Wins: 4173, Losses: 8648, Draws: 2680
Episode: 15600, Win Rate: 0.27, Epsilon: 0.010, smartmove0.7777777777777778
Wins: 4196, Losses: 8706, Draws: 2699
Episode: 15700, Win Rate: 0.27, Epsilon: 0.010, smartmove0.7777777777777778
Wins: 4216, Losses: 8762, Draws: 2723
Episode: 15800, Win Rate: 0.27, Epsilon: 0.010, smartmove0.7777777777777778
Wins: 4242, 



Resetting epsilon to 1.0
Episode: 16000, Win Rate: 0.27, Epsilon: 1.000, smartmove0.8
Wins: 4287, Losses: 8921, Draws: 2793
Episode: 16100, Win Rate: 0.27, Epsilon: 0.901, smartmove0.8
Wins: 4291, Losses: 9008, Draws: 2802
Episode: 16200, Win Rate: 0.27, Epsilon: 0.802, smartmove0.8
Wins: 4295, Losses: 9094, Draws: 2812
Episode: 16300, Win Rate: 0.26, Epsilon: 0.703, smartmove0.8
Wins: 4305, Losses: 9173, Draws: 2823
Episode: 16400, Win Rate: 0.26, Epsilon: 0.604, smartmove0.8
Wins: 4320, Losses: 9248, Draws: 2833
Episode: 16500, Win Rate: 0.26, Epsilon: 0.505, smartmove0.8
Wins: 4327, Losses: 9321, Draws: 2853
Episode: 16600, Win Rate: 0.26, Epsilon: 0.406, smartmove0.8
Wins: 4334, Losses: 9392, Draws: 2875
Episode: 16700, Win Rate: 0.26, Epsilon: 0.307, smartmove0.8
Wins: 4343, Losses: 9453, Draws: 2905
Episode: 16800, Win Rate: 0.26, Epsilon: 0.208, smartmove0.8
Wins: 4355, Losses: 9516, Draws: 2930
Episode: 16900, Win Rate: 0.26, Epsilon: 0.109, smartmove0.8
Wins: 4368, Losses: 957



Episode: 17000, Win Rate: 0.26, Epsilon: 0.010, smartmove0.8
Wins: 4384, Losses: 9621, Draws: 2996
Episode: 17100, Win Rate: 0.26, Epsilon: 0.010, smartmove0.8
Wins: 4406, Losses: 9663, Draws: 3032
Episode: 17200, Win Rate: 0.26, Epsilon: 0.010, smartmove0.8
Wins: 4426, Losses: 9708, Draws: 3067
Episode: 17300, Win Rate: 0.26, Epsilon: 0.010, smartmove0.8
Wins: 4442, Losses: 9749, Draws: 3110
Episode: 17400, Win Rate: 0.26, Epsilon: 0.010, smartmove0.8
Wins: 4460, Losses: 9803, Draws: 3138
Episode: 17500, Win Rate: 0.26, Epsilon: 0.010, smartmove0.8
Wins: 4481, Losses: 9844, Draws: 3176
Episode: 17600, Win Rate: 0.26, Epsilon: 0.010, smartmove0.8
Wins: 4505, Losses: 9883, Draws: 3213
Episode: 17700, Win Rate: 0.26, Epsilon: 0.010, smartmove0.8
Wins: 4539, Losses: 9916, Draws: 3246
Episode: 17800, Win Rate: 0.26, Epsilon: 0.010, smartmove0.8
Wins: 4570, Losses: 9954, Draws: 3277
Episode: 17900, Win Rate: 0.26, Epsilon: 0.010, smartmove0.8
Wins: 4593, Losses: 9993, Draws: 3315




Resetting epsilon to 1.0
Episode: 18000, Win Rate: 0.26, Epsilon: 1.000, smartmove0.8
Wins: 4610, Losses: 10032, Draws: 3359
Episode: 18100, Win Rate: 0.26, Epsilon: 0.901, smartmove0.8
Wins: 4618, Losses: 10114, Draws: 3369
Episode: 18200, Win Rate: 0.25, Epsilon: 0.802, smartmove0.8
Wins: 4624, Losses: 10190, Draws: 3387
Episode: 18300, Win Rate: 0.25, Epsilon: 0.703, smartmove0.8
Wins: 4633, Losses: 10265, Draws: 3403
Episode: 18400, Win Rate: 0.25, Epsilon: 0.604, smartmove0.8
Wins: 4641, Losses: 10344, Draws: 3416
Episode: 18500, Win Rate: 0.25, Epsilon: 0.505, smartmove0.8
Wins: 4654, Losses: 10415, Draws: 3432
Episode: 18600, Win Rate: 0.25, Epsilon: 0.406, smartmove0.8
Wins: 4664, Losses: 10486, Draws: 3451
Episode: 18700, Win Rate: 0.25, Epsilon: 0.307, smartmove0.8
Wins: 4676, Losses: 10549, Draws: 3476
Episode: 18800, Win Rate: 0.25, Epsilon: 0.208, smartmove0.8
Wins: 4688, Losses: 10611, Draws: 3502
Episode: 18900, Win Rate: 0.25, Epsilon: 0.109, smartmove0.8
Wins: 4705, Lo



Episode: 19000, Win Rate: 0.25, Epsilon: 0.010, smartmove0.8
Wins: 4724, Losses: 10695, Draws: 3582
Episode: 19100, Win Rate: 0.25, Epsilon: 0.010, smartmove0.8
Wins: 4744, Losses: 10739, Draws: 3618
Episode: 19200, Win Rate: 0.25, Epsilon: 0.010, smartmove0.8
Wins: 4762, Losses: 10777, Draws: 3662
Episode: 19300, Win Rate: 0.25, Epsilon: 0.010, smartmove0.8
Wins: 4785, Losses: 10813, Draws: 3703
Episode: 19400, Win Rate: 0.25, Epsilon: 0.010, smartmove0.8
Wins: 4807, Losses: 10844, Draws: 3750
Episode: 19500, Win Rate: 0.25, Epsilon: 0.010, smartmove0.8
Wins: 4827, Losses: 10879, Draws: 3795
Episode: 19600, Win Rate: 0.25, Epsilon: 0.010, smartmove0.8
Wins: 4844, Losses: 10925, Draws: 3832
Episode: 19700, Win Rate: 0.25, Epsilon: 0.010, smartmove0.8
Wins: 4869, Losses: 10961, Draws: 3871
Episode: 19800, Win Rate: 0.25, Epsilon: 0.010, smartmove0.8
Wins: 4889, Losses: 10997, Draws: 3915
Episode: 19900, Win Rate: 0.25, Epsilon: 0.010, smartmove0.8
Wins: 4919, Losses: 11026, Draws: 3956




In [None]:
import numpy as np
import random
import os
from collections import deque
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.95, model_path='model6.h5'):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9995
        self.batch_size = 32
        self.replay_buffer = deque(maxlen=10000)
        self.model_path = model_path

        # Try to load the model if it exists, otherwise create a new one
        if os.path.exists(model_path):
            try:
                print(f'{model_path} exists. Loading the model.')
                self.model = load_model(model_path, compile=False)
                # Recompile the model with explicit loss function
                self.model.compile(
                    loss=MeanSquaredError(),
                    optimizer=Adam(learning_rate=self.learning_rate)
                )
            except Exception as e:
                print(f"Error loading model: {e}")
                print("Creating new model instead.")
                self.model = self._build_model()
        else:
            print(f'{model_path} does not exist. Starting new training.')
            self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(
            loss=MeanSquaredError(),
            optimizer=Adam(learning_rate=self.learning_rate)
        )
        return model

    def process_state(self, state):
        """
        Converts the state from [0,1,2] format to [-1,0,1] format for better learning.
        """
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        
        # Use batch prediction for efficiency
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                next_valid_actions = [j for j, val in enumerate(next_state) if val == 0]
                if next_valid_actions:
                    max_next_q = max(next_q_values[i][action] for action in next_valid_actions)
                    target = reward + self.gamma * max_next_q
                else:
                    target = reward
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=self.batch_size, epochs=1, verbose=0)

def train_agent(episodes=20000, model_path='model6.h5'):
    agent = SQNAgent(model_path=model_path)
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    initial_epsilon = 1.0
    min_epsilon = 0.01
    decay_episodes = 1000
    
    for episode in range(episodes):
        if episode % 1000 == 0 and episode > 0:
            agent.epsilon = 1.0
            print("Resetting epsilon to 1.0")
        else:
            epsilon_decay = (initial_epsilon - min_epsilon) / decay_episodes
            agent.epsilon = max(min_epsilon, initial_epsilon - (episode % 1000) * epsilon_decay)
        
        smartness = min(1, episode / (episodes * 0.6))
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 1.0
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            game.player1_move()
            if game.current_winner == 1:
                reward = -1.0
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = min(-0.1, -0.5 * smartness)
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        if episode % 100 == 0:
            win_rate = history['wins'] / (episode + 1)
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Epsilon: {agent.epsilon:.3f}, smartmove: {smartness:.2f}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
        
        if episode % 1000 == 0:
            try:
                agent.model.save(f'model6_retrain_episode_{episode}.h5')
                print(f"Checkpoint saved at episode {episode}")
            except Exception as e:
                print(f"Error saving checkpoint: {e}")

    # Save the final model
    try:
        agent.model.save("model6_retrain.h5")
        print(f'Final model saved as model6_retrain.h5')
    except Exception as e:
        print(f"Error saving final model: {e}")


train_agent()



model6.h5 exists. Loading the model.
Episode: 0, Win Rate: 0.00, Epsilon: 1.000, smartmove: 0.00
Wins: 0, Losses: 1, Draws: 0
Checkpoint saved at episode 0
Episode: 100, Win Rate: 0.22, Epsilon: 0.901, smartmove: 0.01
Wins: 22, Losses: 61, Draws: 18
Episode: 200, Win Rate: 0.26, Epsilon: 0.802, smartmove: 0.02
Wins: 53, Losses: 122, Draws: 26
Episode: 300, Win Rate: 0.29, Epsilon: 0.703, smartmove: 0.03
Wins: 88, Losses: 178, Draws: 35
Episode: 400, Win Rate: 0.32, Epsilon: 0.604, smartmove: 0.03
Wins: 129, Losses: 224, Draws: 48
Episode: 500, Win Rate: 0.33, Epsilon: 0.505, smartmove: 0.04
Wins: 164, Losses: 274, Draws: 63
Episode: 600, Win Rate: 0.34, Epsilon: 0.406, smartmove: 0.05
Wins: 205, Losses: 319, Draws: 77
Episode: 700, Win Rate: 0.35, Epsilon: 0.307, smartmove: 0.06
Wins: 243, Losses: 360, Draws: 98
Episode: 800, Win Rate: 0.37, Epsilon: 0.208, smartmove: 0.07
Wins: 293, Losses: 394, Draws: 114
Episode: 900, Win Rate: 0.37, Epsilon: 0.109, smartmove: 0.07
Wins: 337, Losses



Resetting epsilon to 1.0
Episode: 1000, Win Rate: 0.38, Epsilon: 1.000, smartmove: 0.08
Wins: 378, Losses: 480, Draws: 143
Checkpoint saved at episode 1000
Episode: 1100, Win Rate: 0.36, Epsilon: 0.901, smartmove: 0.09
Wins: 401, Losses: 545, Draws: 155
Episode: 1200, Win Rate: 0.35, Epsilon: 0.802, smartmove: 0.10
Wins: 424, Losses: 608, Draws: 169
Episode: 1300, Win Rate: 0.35, Epsilon: 0.703, smartmove: 0.11
Wins: 452, Losses: 669, Draws: 180
Episode: 1400, Win Rate: 0.34, Epsilon: 0.604, smartmove: 0.12
Wins: 478, Losses: 728, Draws: 195
Episode: 1500, Win Rate: 0.34, Epsilon: 0.505, smartmove: 0.12
Wins: 505, Losses: 783, Draws: 213
Episode: 1600, Win Rate: 0.34, Epsilon: 0.406, smartmove: 0.13
Wins: 544, Losses: 835, Draws: 222
Episode: 1700, Win Rate: 0.34, Epsilon: 0.307, smartmove: 0.14
Wins: 586, Losses: 880, Draws: 235
Episode: 1800, Win Rate: 0.35, Epsilon: 0.208, smartmove: 0.15
Wins: 633, Losses: 915, Draws: 253
Episode: 1900, Win Rate: 0.35, Epsilon: 0.109, smartmove: 0.



Resetting epsilon to 1.0
Episode: 2000, Win Rate: 0.36, Epsilon: 1.000, smartmove: 0.17
Wins: 712, Losses: 1013, Draws: 276
Checkpoint saved at episode 2000
Episode: 2100, Win Rate: 0.35, Epsilon: 0.901, smartmove: 0.17
Wins: 733, Losses: 1081, Draws: 287
Episode: 2200, Win Rate: 0.34, Epsilon: 0.802, smartmove: 0.18
Wins: 759, Losses: 1141, Draws: 301
Episode: 2300, Win Rate: 0.34, Epsilon: 0.703, smartmove: 0.19
Wins: 780, Losses: 1196, Draws: 325
Episode: 2400, Win Rate: 0.33, Epsilon: 0.604, smartmove: 0.20
Wins: 800, Losses: 1260, Draws: 341
Episode: 2500, Win Rate: 0.33, Epsilon: 0.505, smartmove: 0.21
Wins: 827, Losses: 1310, Draws: 364
Episode: 2600, Win Rate: 0.33, Epsilon: 0.406, smartmove: 0.22
Wins: 860, Losses: 1358, Draws: 383
Episode: 2700, Win Rate: 0.33, Epsilon: 0.307, smartmove: 0.23
Wins: 898, Losses: 1400, Draws: 403
Episode: 2800, Win Rate: 0.33, Epsilon: 0.208, smartmove: 0.23
Wins: 926, Losses: 1455, Draws: 420
Episode: 2900, Win Rate: 0.33, Epsilon: 0.109, smar



Resetting epsilon to 1.0
Episode: 3000, Win Rate: 0.33, Epsilon: 1.000, smartmove: 0.25
Wins: 994, Losses: 1539, Draws: 468
Checkpoint saved at episode 3000
Episode: 3100, Win Rate: 0.33, Epsilon: 0.901, smartmove: 0.26
Wins: 1016, Losses: 1609, Draws: 476
Episode: 3200, Win Rate: 0.32, Epsilon: 0.802, smartmove: 0.27
Wins: 1038, Losses: 1679, Draws: 484
Episode: 3300, Win Rate: 0.32, Epsilon: 0.703, smartmove: 0.28
Wins: 1055, Losses: 1751, Draws: 495
Episode: 3400, Win Rate: 0.32, Epsilon: 0.604, smartmove: 0.28
Wins: 1091, Losses: 1802, Draws: 508
Episode: 3500, Win Rate: 0.32, Epsilon: 0.505, smartmove: 0.29
Wins: 1127, Losses: 1855, Draws: 519
Episode: 3600, Win Rate: 0.32, Epsilon: 0.406, smartmove: 0.30
Wins: 1155, Losses: 1910, Draws: 536
Episode: 3700, Win Rate: 0.32, Epsilon: 0.307, smartmove: 0.31
Wins: 1189, Losses: 1957, Draws: 555
Episode: 3800, Win Rate: 0.32, Epsilon: 0.208, smartmove: 0.32
Wins: 1218, Losses: 2012, Draws: 571
Episode: 3900, Win Rate: 0.32, Epsilon: 0.1



Resetting epsilon to 1.0
Episode: 4000, Win Rate: 0.32, Epsilon: 1.000, smartmove: 0.33
Wins: 1276, Losses: 2125, Draws: 600
Checkpoint saved at episode 4000
Episode: 4100, Win Rate: 0.31, Epsilon: 0.901, smartmove: 0.34
Wins: 1291, Losses: 2202, Draws: 608
Episode: 4200, Win Rate: 0.31, Epsilon: 0.802, smartmove: 0.35
Wins: 1315, Losses: 2265, Draws: 621
Episode: 4300, Win Rate: 0.31, Epsilon: 0.703, smartmove: 0.36
Wins: 1335, Losses: 2330, Draws: 636
Episode: 4400, Win Rate: 0.31, Epsilon: 0.604, smartmove: 0.37
Wins: 1351, Losses: 2397, Draws: 653
Episode: 4500, Win Rate: 0.30, Epsilon: 0.505, smartmove: 0.38
Wins: 1367, Losses: 2463, Draws: 671
Episode: 4600, Win Rate: 0.30, Epsilon: 0.406, smartmove: 0.38
Wins: 1386, Losses: 2528, Draws: 687
Episode: 4700, Win Rate: 0.30, Epsilon: 0.307, smartmove: 0.39
Wins: 1419, Losses: 2584, Draws: 698
Episode: 4800, Win Rate: 0.30, Epsilon: 0.208, smartmove: 0.40
Wins: 1439, Losses: 2641, Draws: 721
Episode: 4900, Win Rate: 0.30, Epsilon: 0.



Resetting epsilon to 1.0
Episode: 5000, Win Rate: 0.30, Epsilon: 1.000, smartmove: 0.42
Wins: 1500, Losses: 2739, Draws: 762
Checkpoint saved at episode 5000
Episode: 5100, Win Rate: 0.30, Epsilon: 0.901, smartmove: 0.42
Wins: 1510, Losses: 2816, Draws: 775
Episode: 5200, Win Rate: 0.29, Epsilon: 0.802, smartmove: 0.43
Wins: 1526, Losses: 2891, Draws: 784
Episode: 5300, Win Rate: 0.29, Epsilon: 0.703, smartmove: 0.44
Wins: 1546, Losses: 2956, Draws: 799
Episode: 5400, Win Rate: 0.29, Epsilon: 0.604, smartmove: 0.45
Wins: 1564, Losses: 3021, Draws: 816
Episode: 5500, Win Rate: 0.29, Epsilon: 0.505, smartmove: 0.46
Wins: 1589, Losses: 3088, Draws: 824
Episode: 5600, Win Rate: 0.29, Epsilon: 0.406, smartmove: 0.47
Wins: 1617, Losses: 3145, Draws: 839
Episode: 5700, Win Rate: 0.29, Epsilon: 0.307, smartmove: 0.47
Wins: 1645, Losses: 3200, Draws: 856
Episode: 5800, Win Rate: 0.29, Epsilon: 0.208, smartmove: 0.48
Wins: 1660, Losses: 3252, Draws: 889
Episode: 5900, Win Rate: 0.29, Epsilon: 0.



Resetting epsilon to 1.0
Episode: 6000, Win Rate: 0.29, Epsilon: 1.000, smartmove: 0.50
Wins: 1717, Losses: 3346, Draws: 938
Checkpoint saved at episode 6000
Episode: 6100, Win Rate: 0.28, Epsilon: 0.901, smartmove: 0.51
Wins: 1733, Losses: 3418, Draws: 950
Episode: 6200, Win Rate: 0.28, Epsilon: 0.802, smartmove: 0.52
Wins: 1753, Losses: 3482, Draws: 966
Episode: 6300, Win Rate: 0.28, Epsilon: 0.703, smartmove: 0.53
Wins: 1771, Losses: 3552, Draws: 978
Episode: 6400, Win Rate: 0.28, Epsilon: 0.604, smartmove: 0.53
Wins: 1784, Losses: 3621, Draws: 996
Episode: 6500, Win Rate: 0.28, Epsilon: 0.505, smartmove: 0.54
Wins: 1799, Losses: 3690, Draws: 1012
Episode: 6600, Win Rate: 0.27, Epsilon: 0.406, smartmove: 0.55
Wins: 1814, Losses: 3758, Draws: 1029
Episode: 6700, Win Rate: 0.27, Epsilon: 0.307, smartmove: 0.56
Wins: 1832, Losses: 3821, Draws: 1048
Episode: 6800, Win Rate: 0.27, Epsilon: 0.208, smartmove: 0.57
Wins: 1853, Losses: 3883, Draws: 1065
Episode: 6900, Win Rate: 0.27, Epsilon



Resetting epsilon to 1.0
Episode: 7000, Win Rate: 0.27, Epsilon: 1.000, smartmove: 0.58
Wins: 1891, Losses: 3984, Draws: 1126
Checkpoint saved at episode 7000
Episode: 7100, Win Rate: 0.27, Epsilon: 0.901, smartmove: 0.59
Wins: 1900, Losses: 4064, Draws: 1137
Episode: 7200, Win Rate: 0.27, Epsilon: 0.802, smartmove: 0.60
Wins: 1910, Losses: 4135, Draws: 1156
Episode: 7300, Win Rate: 0.26, Epsilon: 0.703, smartmove: 0.61
Wins: 1926, Losses: 4199, Draws: 1176
Episode: 7400, Win Rate: 0.26, Epsilon: 0.604, smartmove: 0.62
Wins: 1937, Losses: 4271, Draws: 1193
Episode: 7500, Win Rate: 0.26, Epsilon: 0.505, smartmove: 0.62
Wins: 1946, Losses: 4350, Draws: 1205
Episode: 7600, Win Rate: 0.26, Epsilon: 0.406, smartmove: 0.63
Wins: 1962, Losses: 4420, Draws: 1219
Episode: 7700, Win Rate: 0.26, Epsilon: 0.307, smartmove: 0.64
Wins: 1984, Losses: 4472, Draws: 1245
Episode: 7800, Win Rate: 0.26, Epsilon: 0.208, smartmove: 0.65
Wins: 2004, Losses: 4520, Draws: 1277
Episode: 7900, Win Rate: 0.26, Ep



Resetting epsilon to 1.0
Episode: 8000, Win Rate: 0.25, Epsilon: 1.000, smartmove: 0.67
Wins: 2033, Losses: 4624, Draws: 1344
Checkpoint saved at episode 8000
Episode: 8100, Win Rate: 0.25, Epsilon: 0.901, smartmove: 0.68
Wins: 2037, Losses: 4708, Draws: 1356
Episode: 8200, Win Rate: 0.25, Epsilon: 0.802, smartmove: 0.68
Wins: 2044, Losses: 4787, Draws: 1370
Episode: 8300, Win Rate: 0.25, Epsilon: 0.703, smartmove: 0.69
Wins: 2052, Losses: 4864, Draws: 1385
Episode: 8400, Win Rate: 0.25, Epsilon: 0.604, smartmove: 0.70
Wins: 2063, Losses: 4933, Draws: 1405
Episode: 8500, Win Rate: 0.24, Epsilon: 0.505, smartmove: 0.71
Wins: 2078, Losses: 4999, Draws: 1424
Episode: 8600, Win Rate: 0.24, Epsilon: 0.406, smartmove: 0.72
Wins: 2095, Losses: 5064, Draws: 1442
Episode: 8700, Win Rate: 0.24, Epsilon: 0.307, smartmove: 0.72
Wins: 2111, Losses: 5123, Draws: 1467
Episode: 8800, Win Rate: 0.24, Epsilon: 0.208, smartmove: 0.73
Wins: 2123, Losses: 5186, Draws: 1492
Episode: 8900, Win Rate: 0.24, Ep



Resetting epsilon to 1.0
Episode: 9000, Win Rate: 0.24, Epsilon: 1.000, smartmove: 0.75
Wins: 2158, Losses: 5290, Draws: 1553
Checkpoint saved at episode 9000
Episode: 9100, Win Rate: 0.24, Epsilon: 0.901, smartmove: 0.76
Wins: 2165, Losses: 5368, Draws: 1568
Episode: 9200, Win Rate: 0.24, Epsilon: 0.802, smartmove: 0.77
Wins: 2169, Losses: 5456, Draws: 1576
Episode: 9300, Win Rate: 0.23, Epsilon: 0.703, smartmove: 0.78
Wins: 2172, Losses: 5541, Draws: 1588
Episode: 9400, Win Rate: 0.23, Epsilon: 0.604, smartmove: 0.78
Wins: 2180, Losses: 5619, Draws: 1602
Episode: 9500, Win Rate: 0.23, Epsilon: 0.505, smartmove: 0.79
Wins: 2185, Losses: 5695, Draws: 1621
Episode: 9600, Win Rate: 0.23, Epsilon: 0.406, smartmove: 0.80
Wins: 2192, Losses: 5771, Draws: 1638
Episode: 9700, Win Rate: 0.23, Epsilon: 0.307, smartmove: 0.81
Wins: 2202, Losses: 5839, Draws: 1660
Episode: 9800, Win Rate: 0.23, Epsilon: 0.208, smartmove: 0.82
Wins: 2212, Losses: 5890, Draws: 1699
Episode: 9900, Win Rate: 0.22, Ep



Resetting epsilon to 1.0
Episode: 10000, Win Rate: 0.22, Epsilon: 1.000, smartmove: 0.83
Wins: 2235, Losses: 5990, Draws: 1776
Checkpoint saved at episode 10000
Episode: 10100, Win Rate: 0.22, Epsilon: 0.901, smartmove: 0.84
Wins: 2239, Losses: 6077, Draws: 1785
Episode: 10200, Win Rate: 0.22, Epsilon: 0.802, smartmove: 0.85
Wins: 2244, Losses: 6156, Draws: 1801
Episode: 10300, Win Rate: 0.22, Epsilon: 0.703, smartmove: 0.86
Wins: 2246, Losses: 6242, Draws: 1813
Episode: 10400, Win Rate: 0.22, Epsilon: 0.604, smartmove: 0.87
Wins: 2258, Losses: 6312, Draws: 1831
Episode: 10500, Win Rate: 0.22, Epsilon: 0.505, smartmove: 0.88
Wins: 2266, Losses: 6385, Draws: 1850
Episode: 10600, Win Rate: 0.21, Epsilon: 0.406, smartmove: 0.88
Wins: 2271, Losses: 6456, Draws: 1874
Episode: 10700, Win Rate: 0.21, Epsilon: 0.307, smartmove: 0.89
Wins: 2281, Losses: 6515, Draws: 1905
Episode: 10800, Win Rate: 0.21, Epsilon: 0.208, smartmove: 0.90
Wins: 2293, Losses: 6577, Draws: 1931
Episode: 10900, Win Rat



Resetting epsilon to 1.0
Episode: 11000, Win Rate: 0.21, Epsilon: 1.000, smartmove: 0.92
Wins: 2317, Losses: 6673, Draws: 2011
Checkpoint saved at episode 11000
Episode: 11100, Win Rate: 0.21, Epsilon: 0.901, smartmove: 0.93
Wins: 2319, Losses: 6756, Draws: 2026
Episode: 11200, Win Rate: 0.21, Epsilon: 0.802, smartmove: 0.93
Wins: 2323, Losses: 6843, Draws: 2035
Episode: 11300, Win Rate: 0.21, Epsilon: 0.703, smartmove: 0.94
Wins: 2326, Losses: 6921, Draws: 2054
Episode: 11400, Win Rate: 0.20, Epsilon: 0.604, smartmove: 0.95
Wins: 2333, Losses: 6997, Draws: 2071
Episode: 11500, Win Rate: 0.20, Epsilon: 0.505, smartmove: 0.96
Wins: 2339, Losses: 7065, Draws: 2097
Episode: 11600, Win Rate: 0.20, Epsilon: 0.406, smartmove: 0.97
Wins: 2343, Losses: 7148, Draws: 2110
Episode: 11700, Win Rate: 0.20, Epsilon: 0.307, smartmove: 0.97
Wins: 2348, Losses: 7212, Draws: 2141
Episode: 11800, Win Rate: 0.20, Epsilon: 0.208, smartmove: 0.98
Wins: 2356, Losses: 7271, Draws: 2174
Episode: 11900, Win Rat



Resetting epsilon to 1.0
Episode: 12000, Win Rate: 0.20, Epsilon: 1.000, smartmove: 1.00
Wins: 2365, Losses: 7356, Draws: 2280
Checkpoint saved at episode 12000
Episode: 12100, Win Rate: 0.20, Epsilon: 0.901, smartmove: 1.00
Wins: 2367, Losses: 7449, Draws: 2285
Episode: 12200, Win Rate: 0.19, Epsilon: 0.802, smartmove: 1.00
Wins: 2371, Losses: 7537, Draws: 2293
Episode: 12300, Win Rate: 0.19, Epsilon: 0.703, smartmove: 1.00
Wins: 2374, Losses: 7614, Draws: 2313
Episode: 12400, Win Rate: 0.19, Epsilon: 0.604, smartmove: 1.00
Wins: 2375, Losses: 7693, Draws: 2333
Episode: 12500, Win Rate: 0.19, Epsilon: 0.505, smartmove: 1.00
Wins: 2377, Losses: 7769, Draws: 2355
Episode: 12600, Win Rate: 0.19, Epsilon: 0.406, smartmove: 1.00
Wins: 2380, Losses: 7836, Draws: 2385
Episode: 12700, Win Rate: 0.19, Epsilon: 0.307, smartmove: 1.00
Wins: 2382, Losses: 7902, Draws: 2417
Episode: 12800, Win Rate: 0.19, Epsilon: 0.208, smartmove: 1.00
Wins: 2385, Losses: 7953, Draws: 2463
Episode: 12900, Win Rat



Resetting epsilon to 1.0
Episode: 13000, Win Rate: 0.18, Epsilon: 1.000, smartmove: 1.00
Wins: 2392, Losses: 8024, Draws: 2585
Checkpoint saved at episode 13000
Episode: 13100, Win Rate: 0.18, Epsilon: 0.901, smartmove: 1.00
Wins: 2394, Losses: 8117, Draws: 2590
Episode: 13200, Win Rate: 0.18, Epsilon: 0.802, smartmove: 1.00
Wins: 2395, Losses: 8205, Draws: 2601
Episode: 13300, Win Rate: 0.18, Epsilon: 0.703, smartmove: 1.00
Wins: 2397, Losses: 8293, Draws: 2611
Episode: 13400, Win Rate: 0.18, Epsilon: 0.604, smartmove: 1.00
Wins: 2401, Losses: 8372, Draws: 2628
Episode: 13500, Win Rate: 0.18, Epsilon: 0.505, smartmove: 1.00
Wins: 2402, Losses: 8449, Draws: 2650
Episode: 13600, Win Rate: 0.18, Epsilon: 0.406, smartmove: 1.00
Wins: 2404, Losses: 8516, Draws: 2681
Episode: 13700, Win Rate: 0.18, Epsilon: 0.307, smartmove: 1.00
Wins: 2404, Losses: 8579, Draws: 2718
Episode: 13800, Win Rate: 0.17, Epsilon: 0.208, smartmove: 1.00
Wins: 2407, Losses: 8626, Draws: 2768
Episode: 13900, Win Rat



Resetting epsilon to 1.0
Episode: 14000, Win Rate: 0.17, Epsilon: 1.000, smartmove: 1.00
Wins: 2415, Losses: 8717, Draws: 2869
Checkpoint saved at episode 14000
Episode: 14100, Win Rate: 0.17, Epsilon: 0.901, smartmove: 1.00
Wins: 2416, Losses: 8810, Draws: 2875
Episode: 14200, Win Rate: 0.17, Epsilon: 0.802, smartmove: 1.00
Wins: 2416, Losses: 8892, Draws: 2893
Episode: 14300, Win Rate: 0.17, Epsilon: 0.703, smartmove: 1.00
Wins: 2417, Losses: 8974, Draws: 2910
Episode: 14400, Win Rate: 0.17, Epsilon: 0.604, smartmove: 1.00
Wins: 2418, Losses: 9052, Draws: 2931
Episode: 14500, Win Rate: 0.17, Epsilon: 0.505, smartmove: 1.00
Wins: 2418, Losses: 9126, Draws: 2957
Episode: 14600, Win Rate: 0.17, Epsilon: 0.406, smartmove: 1.00
Wins: 2420, Losses: 9200, Draws: 2981
Episode: 14700, Win Rate: 0.16, Epsilon: 0.307, smartmove: 1.00
Wins: 2425, Losses: 9256, Draws: 3020
Episode: 14800, Win Rate: 0.16, Epsilon: 0.208, smartmove: 1.00
Wins: 2428, Losses: 9309, Draws: 3064
Episode: 14900, Win Rat



Resetting epsilon to 1.0
Episode: 15000, Win Rate: 0.16, Epsilon: 1.000, smartmove: 1.00
Wins: 2438, Losses: 9387, Draws: 3176
Checkpoint saved at episode 15000
Episode: 15100, Win Rate: 0.16, Epsilon: 0.901, smartmove: 1.00
Wins: 2439, Losses: 9473, Draws: 3189
Episode: 15200, Win Rate: 0.16, Epsilon: 0.802, smartmove: 1.00
Wins: 2440, Losses: 9559, Draws: 3202
Episode: 15300, Win Rate: 0.16, Epsilon: 0.703, smartmove: 1.00
Wins: 2440, Losses: 9644, Draws: 3217
Episode: 15400, Win Rate: 0.16, Epsilon: 0.604, smartmove: 1.00
Wins: 2440, Losses: 9721, Draws: 3240
Episode: 15500, Win Rate: 0.16, Epsilon: 0.505, smartmove: 1.00
Wins: 2442, Losses: 9796, Draws: 3263
Episode: 15600, Win Rate: 0.16, Epsilon: 0.406, smartmove: 1.00
Wins: 2446, Losses: 9859, Draws: 3296
Episode: 15700, Win Rate: 0.16, Epsilon: 0.307, smartmove: 1.00
Wins: 2446, Losses: 9911, Draws: 3344
Episode: 15800, Win Rate: 0.16, Epsilon: 0.208, smartmove: 1.00
Wins: 2450, Losses: 9965, Draws: 3386
Episode: 15900, Win Rat



Resetting epsilon to 1.0
Episode: 16000, Win Rate: 0.15, Epsilon: 1.000, smartmove: 1.00
Wins: 2459, Losses: 10035, Draws: 3507
Checkpoint saved at episode 16000
Episode: 16100, Win Rate: 0.15, Epsilon: 0.901, smartmove: 1.00
Wins: 2460, Losses: 10123, Draws: 3518
Episode: 16200, Win Rate: 0.15, Epsilon: 0.802, smartmove: 1.00
Wins: 2462, Losses: 10203, Draws: 3536
Episode: 16300, Win Rate: 0.15, Epsilon: 0.703, smartmove: 1.00
Wins: 2464, Losses: 10289, Draws: 3548
Episode: 16400, Win Rate: 0.15, Epsilon: 0.604, smartmove: 1.00
Wins: 2468, Losses: 10371, Draws: 3562
Episode: 16500, Win Rate: 0.15, Epsilon: 0.505, smartmove: 1.00
Wins: 2468, Losses: 10440, Draws: 3593
Episode: 16600, Win Rate: 0.15, Epsilon: 0.406, smartmove: 1.00
Wins: 2477, Losses: 10502, Draws: 3622
Episode: 16700, Win Rate: 0.15, Epsilon: 0.307, smartmove: 1.00
Wins: 2480, Losses: 10564, Draws: 3657
Episode: 16800, Win Rate: 0.15, Epsilon: 0.208, smartmove: 1.00
Wins: 2485, Losses: 10611, Draws: 3705
Episode: 16900



Resetting epsilon to 1.0
Episode: 17000, Win Rate: 0.15, Epsilon: 1.000, smartmove: 1.00
Wins: 2495, Losses: 10687, Draws: 3819
Checkpoint saved at episode 17000
Episode: 17100, Win Rate: 0.15, Epsilon: 0.901, smartmove: 1.00
Wins: 2495, Losses: 10779, Draws: 3827
Episode: 17200, Win Rate: 0.15, Epsilon: 0.802, smartmove: 1.00
Wins: 2496, Losses: 10862, Draws: 3843
Episode: 17300, Win Rate: 0.14, Epsilon: 0.703, smartmove: 1.00
Wins: 2497, Losses: 10949, Draws: 3855
Episode: 17400, Win Rate: 0.14, Epsilon: 0.604, smartmove: 1.00
Wins: 2498, Losses: 11022, Draws: 3881
Episode: 17500, Win Rate: 0.14, Epsilon: 0.505, smartmove: 1.00
Wins: 2500, Losses: 11099, Draws: 3902
Episode: 17600, Win Rate: 0.14, Epsilon: 0.406, smartmove: 1.00
Wins: 2504, Losses: 11162, Draws: 3935
Episode: 17700, Win Rate: 0.14, Epsilon: 0.307, smartmove: 1.00
Wins: 2505, Losses: 11224, Draws: 3972
Episode: 17800, Win Rate: 0.14, Epsilon: 0.208, smartmove: 1.00
Wins: 2508, Losses: 11286, Draws: 4007
Episode: 17900



Resetting epsilon to 1.0
Episode: 18000, Win Rate: 0.14, Epsilon: 1.000, smartmove: 1.00
Wins: 2518, Losses: 11375, Draws: 4108
Checkpoint saved at episode 18000
Episode: 18100, Win Rate: 0.14, Epsilon: 0.901, smartmove: 1.00
Wins: 2519, Losses: 11463, Draws: 4119
Episode: 18200, Win Rate: 0.14, Epsilon: 0.802, smartmove: 1.00
Wins: 2522, Losses: 11550, Draws: 4129
Episode: 18300, Win Rate: 0.14, Epsilon: 0.703, smartmove: 1.00
Wins: 2525, Losses: 11623, Draws: 4153
Episode: 18400, Win Rate: 0.14, Epsilon: 0.604, smartmove: 1.00
Wins: 2527, Losses: 11700, Draws: 4174
Episode: 18500, Win Rate: 0.14, Epsilon: 0.505, smartmove: 1.00
Wins: 2528, Losses: 11772, Draws: 4201
Episode: 18600, Win Rate: 0.14, Epsilon: 0.406, smartmove: 1.00
Wins: 2534, Losses: 11845, Draws: 4222
Episode: 18700, Win Rate: 0.14, Epsilon: 0.307, smartmove: 1.00
Wins: 2537, Losses: 11899, Draws: 4265
Episode: 18800, Win Rate: 0.14, Epsilon: 0.208, smartmove: 1.00
Wins: 2544, Losses: 11948, Draws: 4309
Episode: 18900



Resetting epsilon to 1.0
Episode: 19000, Win Rate: 0.13, Epsilon: 1.000, smartmove: 1.00
Wins: 2552, Losses: 12037, Draws: 4412
Checkpoint saved at episode 19000
Episode: 19100, Win Rate: 0.13, Epsilon: 0.901, smartmove: 1.00
Wins: 2553, Losses: 12128, Draws: 4420
Episode: 19200, Win Rate: 0.13, Epsilon: 0.802, smartmove: 1.00
Wins: 2554, Losses: 12214, Draws: 4433
Episode: 19300, Win Rate: 0.13, Epsilon: 0.703, smartmove: 1.00
Wins: 2554, Losses: 12302, Draws: 4445
Episode: 19400, Win Rate: 0.13, Epsilon: 0.604, smartmove: 1.00
Wins: 2558, Losses: 12379, Draws: 4464
Episode: 19500, Win Rate: 0.13, Epsilon: 0.505, smartmove: 1.00
Wins: 2559, Losses: 12456, Draws: 4486
Episode: 19600, Win Rate: 0.13, Epsilon: 0.406, smartmove: 1.00
Wins: 2563, Losses: 12526, Draws: 4512
Episode: 19700, Win Rate: 0.13, Epsilon: 0.307, smartmove: 1.00
Wins: 2567, Losses: 12583, Draws: 4551
Episode: 19800, Win Rate: 0.13, Epsilon: 0.208, smartmove: 1.00
Wins: 2575, Losses: 12628, Draws: 4598
Episode: 19900



Final model saved as model6_retrain.h5


In [33]:
import numpy as np
import random
import os
from collections import deque
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.95, model_path='model6.h5'):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9995
        self.batch_size = 32
        self.replay_buffer = deque(maxlen=10000)
        self.model_path = model_path

        # Try to load the model if it exists, otherwise create a new one
        if os.path.exists(model_path):
            try:
                print(f'{model_path} exists. Loading the model.')
                self.model = load_model(model_path, compile=False)
                # Recompile the model with explicit loss function
                self.model.compile(
                    loss=MeanSquaredError(),
                    optimizer=Adam(learning_rate=self.learning_rate)
                )
            except Exception as e:
                print(f"Error loading model: {e}")
                print("Creating new model instead.")
                self.model = self._build_model()
        else:
            print(f'{model_path} does not exist. Starting new training.')
            self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(
            loss=MeanSquaredError(),
            optimizer=Adam(learning_rate=self.learning_rate)
        )
        return model

    def process_state(self, state):
        """
        Converts the state from [0,1,2] format to [-1,0,1] format for better learning.
        """
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        
        # Use batch prediction for efficiency
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                next_valid_actions = [j for j, val in enumerate(next_state) if val == 0]
                if next_valid_actions:
                    max_next_q = max(next_q_values[i][action] for action in next_valid_actions)
                    target = reward + self.gamma * max_next_q
                else:
                    target = reward
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=self.batch_size, epochs=1, verbose=0)

def train_agent(episodes=40000, model_path='model6_retrain_retrain_retrain.h5'):
    agent = SQNAgent(model_path=model_path)
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    initial_epsilon = 1.0
    min_epsilon = 0.01
    decay_episodes = 1000
    smartness=0
    for episode in range(episodes):
        if episode % 1000 == 0 and episode > 0:
            
            print("Resetting epsilon to 1.0")
        else:
            epsilon_decay = (initial_epsilon - min_epsilon) / decay_episodes
            agent.epsilon = max(min_epsilon, initial_epsilon - (episode % 1000) * epsilon_decay)
        
        smartness = min(1, episode / (episodes * 0.9))
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 1.0
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            game.player1_move()
            if game.current_winner == 1:
                reward = -1.0
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = min(-0.1, -0.5 * smartness)
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        if episode % 100 == 0:
            win_rate = history['wins'] / (episode + 1)
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Epsilon: {agent.epsilon:.3f}, smartmove: {smartness:.2f}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
        
        if episode % 1000 == 0:
            try:
                agent.model.save(f'model6_retrain_retrain_episode_{episode}.h5')
                print(f"Checkpoint saved at episode {episode}")
            except Exception as e:
                print(f"Error saving checkpoint: {e}")

    # Save the final model
    try:
        agent.model.save("model6_retrain_retrain_retrain_retrain.h5")
        print(f'Final model saved as model6_retrain_retrain.h5')
    except Exception as e:
        print(f"Error saving final model: {e}")


train_agent()



model6_retrain_retrain_retrain.h5 exists. Loading the model.
Episode: 0, Win Rate: 0.00, Epsilon: 1.000, smartmove: 0.00
Wins: 0, Losses: 1, Draws: 0
Checkpoint saved at episode 0
Episode: 100, Win Rate: 0.33, Epsilon: 0.901, smartmove: 0.00
Wins: 33, Losses: 53, Draws: 15
Episode: 200, Win Rate: 0.32, Epsilon: 0.802, smartmove: 0.01
Wins: 64, Losses: 113, Draws: 24
Episode: 300, Win Rate: 0.34, Epsilon: 0.703, smartmove: 0.01
Wins: 103, Losses: 161, Draws: 37
Episode: 400, Win Rate: 0.37, Epsilon: 0.604, smartmove: 0.01
Wins: 147, Losses: 197, Draws: 57
Episode: 500, Win Rate: 0.38, Epsilon: 0.505, smartmove: 0.01
Wins: 189, Losses: 233, Draws: 79
Episode: 600, Win Rate: 0.40, Epsilon: 0.406, smartmove: 0.02
Wins: 238, Losses: 265, Draws: 98
Episode: 700, Win Rate: 0.41, Epsilon: 0.307, smartmove: 0.02
Wins: 284, Losses: 306, Draws: 111
Episode: 800, Win Rate: 0.43, Epsilon: 0.208, smartmove: 0.02
Wins: 343, Losses: 331, Draws: 127
Episode: 900, Win Rate: 0.45, Epsilon: 0.109, smartmo



Resetting epsilon to 1.0
Episode: 1000, Win Rate: 0.47, Epsilon: 0.011, smartmove: 0.03
Wins: 471, Losses: 370, Draws: 160
Checkpoint saved at episode 1000
Episode: 1100, Win Rate: 0.45, Epsilon: 0.901, smartmove: 0.03
Wins: 495, Losses: 434, Draws: 172
Episode: 1200, Win Rate: 0.44, Epsilon: 0.802, smartmove: 0.03
Wins: 530, Losses: 482, Draws: 189
Episode: 1300, Win Rate: 0.43, Epsilon: 0.703, smartmove: 0.04
Wins: 555, Losses: 540, Draws: 206
Episode: 1400, Win Rate: 0.42, Epsilon: 0.604, smartmove: 0.04
Wins: 595, Losses: 583, Draws: 223
Episode: 1500, Win Rate: 0.42, Epsilon: 0.505, smartmove: 0.04
Wins: 624, Losses: 623, Draws: 254
Episode: 1600, Win Rate: 0.42, Epsilon: 0.406, smartmove: 0.04
Wins: 667, Losses: 671, Draws: 263
Episode: 1700, Win Rate: 0.41, Epsilon: 0.307, smartmove: 0.05
Wins: 703, Losses: 709, Draws: 289
Episode: 1800, Win Rate: 0.41, Epsilon: 0.208, smartmove: 0.05
Wins: 747, Losses: 737, Draws: 317
Episode: 1900, Win Rate: 0.42, Epsilon: 0.109, smartmove: 0.



Resetting epsilon to 1.0
Episode: 2000, Win Rate: 0.43, Epsilon: 0.011, smartmove: 0.06
Wins: 855, Losses: 782, Draws: 364
Checkpoint saved at episode 2000
Episode: 2100, Win Rate: 0.42, Epsilon: 0.901, smartmove: 0.06
Wins: 890, Losses: 839, Draws: 372
Episode: 2200, Win Rate: 0.42, Epsilon: 0.802, smartmove: 0.06
Wins: 921, Losses: 897, Draws: 383
Episode: 2300, Win Rate: 0.41, Epsilon: 0.703, smartmove: 0.06
Wins: 950, Losses: 952, Draws: 399
Episode: 2400, Win Rate: 0.41, Epsilon: 0.604, smartmove: 0.07
Wins: 984, Losses: 1005, Draws: 412
Episode: 2500, Win Rate: 0.41, Epsilon: 0.505, smartmove: 0.07
Wins: 1023, Losses: 1049, Draws: 429
Episode: 2600, Win Rate: 0.41, Epsilon: 0.406, smartmove: 0.07
Wins: 1060, Losses: 1093, Draws: 448
Episode: 2700, Win Rate: 0.41, Epsilon: 0.307, smartmove: 0.07
Wins: 1109, Losses: 1129, Draws: 463
Episode: 2800, Win Rate: 0.41, Epsilon: 0.208, smartmove: 0.08
Wins: 1160, Losses: 1153, Draws: 488
Episode: 2900, Win Rate: 0.42, Epsilon: 0.109, smar



Resetting epsilon to 1.0
Episode: 3000, Win Rate: 0.42, Epsilon: 0.011, smartmove: 0.08
Wins: 1250, Losses: 1208, Draws: 543
Checkpoint saved at episode 3000
Episode: 3100, Win Rate: 0.41, Epsilon: 0.901, smartmove: 0.09
Wins: 1276, Losses: 1265, Draws: 560
Episode: 3200, Win Rate: 0.41, Epsilon: 0.802, smartmove: 0.09
Wins: 1302, Losses: 1325, Draws: 574
Episode: 3300, Win Rate: 0.40, Epsilon: 0.703, smartmove: 0.09
Wins: 1334, Losses: 1379, Draws: 588
Episode: 3400, Win Rate: 0.40, Epsilon: 0.604, smartmove: 0.09
Wins: 1362, Losses: 1435, Draws: 604
Episode: 3500, Win Rate: 0.40, Epsilon: 0.505, smartmove: 0.10
Wins: 1397, Losses: 1484, Draws: 620
Episode: 3600, Win Rate: 0.40, Epsilon: 0.406, smartmove: 0.10
Wins: 1437, Losses: 1516, Draws: 648
Episode: 3700, Win Rate: 0.40, Epsilon: 0.307, smartmove: 0.10
Wins: 1489, Losses: 1552, Draws: 660
Episode: 3800, Win Rate: 0.40, Epsilon: 0.208, smartmove: 0.11
Wins: 1535, Losses: 1590, Draws: 676
Episode: 3900, Win Rate: 0.41, Epsilon: 0.



Resetting epsilon to 1.0
Episode: 4000, Win Rate: 0.40, Epsilon: 0.011, smartmove: 0.11
Wins: 1619, Losses: 1643, Draws: 739
Checkpoint saved at episode 4000
Episode: 4100, Win Rate: 0.40, Epsilon: 0.901, smartmove: 0.11
Wins: 1646, Losses: 1704, Draws: 751
Episode: 4200, Win Rate: 0.40, Epsilon: 0.802, smartmove: 0.12
Wins: 1673, Losses: 1761, Draws: 767
Episode: 4300, Win Rate: 0.40, Epsilon: 0.703, smartmove: 0.12
Wins: 1702, Losses: 1818, Draws: 781
Episode: 4400, Win Rate: 0.39, Epsilon: 0.604, smartmove: 0.12
Wins: 1734, Losses: 1862, Draws: 805
Episode: 4500, Win Rate: 0.39, Epsilon: 0.505, smartmove: 0.12
Wins: 1766, Losses: 1910, Draws: 825
Episode: 4600, Win Rate: 0.39, Epsilon: 0.406, smartmove: 0.13
Wins: 1803, Losses: 1955, Draws: 843
Episode: 4700, Win Rate: 0.39, Epsilon: 0.307, smartmove: 0.13
Wins: 1841, Losses: 2000, Draws: 860
Episode: 4800, Win Rate: 0.39, Epsilon: 0.208, smartmove: 0.13
Wins: 1870, Losses: 2045, Draws: 886
Episode: 4900, Win Rate: 0.39, Epsilon: 0.



Resetting epsilon to 1.0
Episode: 5000, Win Rate: 0.39, Epsilon: 0.011, smartmove: 0.14
Wins: 1962, Losses: 2106, Draws: 933
Checkpoint saved at episode 5000
Episode: 5100, Win Rate: 0.39, Epsilon: 0.901, smartmove: 0.14
Wins: 1997, Losses: 2158, Draws: 946
Episode: 5200, Win Rate: 0.39, Epsilon: 0.802, smartmove: 0.14
Wins: 2020, Losses: 2224, Draws: 957
Episode: 5300, Win Rate: 0.39, Epsilon: 0.703, smartmove: 0.15
Wins: 2056, Losses: 2280, Draws: 965
Episode: 5400, Win Rate: 0.39, Epsilon: 0.604, smartmove: 0.15
Wins: 2083, Losses: 2335, Draws: 983
Episode: 5500, Win Rate: 0.39, Epsilon: 0.505, smartmove: 0.15
Wins: 2121, Losses: 2384, Draws: 996
Episode: 5600, Win Rate: 0.38, Epsilon: 0.406, smartmove: 0.16
Wins: 2149, Losses: 2442, Draws: 1010
Episode: 5700, Win Rate: 0.38, Epsilon: 0.307, smartmove: 0.16
Wins: 2180, Losses: 2492, Draws: 1029
Episode: 5800, Win Rate: 0.38, Epsilon: 0.208, smartmove: 0.16
Wins: 2213, Losses: 2533, Draws: 1055
Episode: 5900, Win Rate: 0.38, Epsilon:



Resetting epsilon to 1.0
Episode: 6000, Win Rate: 0.38, Epsilon: 0.011, smartmove: 0.17
Wins: 2288, Losses: 2598, Draws: 1115
Checkpoint saved at episode 6000
Episode: 6100, Win Rate: 0.38, Epsilon: 0.901, smartmove: 0.17
Wins: 2311, Losses: 2664, Draws: 1126
Episode: 6200, Win Rate: 0.38, Epsilon: 0.802, smartmove: 0.17
Wins: 2346, Losses: 2716, Draws: 1139
Episode: 6300, Win Rate: 0.38, Epsilon: 0.703, smartmove: 0.17
Wins: 2369, Losses: 2778, Draws: 1154
Episode: 6400, Win Rate: 0.37, Epsilon: 0.604, smartmove: 0.18
Wins: 2396, Losses: 2832, Draws: 1173
Episode: 6500, Win Rate: 0.37, Epsilon: 0.505, smartmove: 0.18
Wins: 2425, Losses: 2883, Draws: 1193
Episode: 6600, Win Rate: 0.37, Epsilon: 0.406, smartmove: 0.18
Wins: 2459, Losses: 2924, Draws: 1218
Episode: 6700, Win Rate: 0.37, Epsilon: 0.307, smartmove: 0.19
Wins: 2494, Losses: 2971, Draws: 1236
Episode: 6800, Win Rate: 0.37, Epsilon: 0.208, smartmove: 0.19
Wins: 2529, Losses: 3014, Draws: 1258
Episode: 6900, Win Rate: 0.37, Ep



Resetting epsilon to 1.0
Episode: 7000, Win Rate: 0.37, Epsilon: 0.011, smartmove: 0.19
Wins: 2610, Losses: 3074, Draws: 1317
Checkpoint saved at episode 7000
Episode: 7100, Win Rate: 0.37, Epsilon: 0.901, smartmove: 0.20
Wins: 2625, Losses: 3149, Draws: 1327
Episode: 7200, Win Rate: 0.37, Epsilon: 0.802, smartmove: 0.20
Wins: 2643, Losses: 3217, Draws: 1341
Episode: 7300, Win Rate: 0.37, Epsilon: 0.703, smartmove: 0.20
Wins: 2666, Losses: 3275, Draws: 1360
Episode: 7400, Win Rate: 0.37, Epsilon: 0.604, smartmove: 0.21
Wins: 2702, Losses: 3333, Draws: 1366
Episode: 7500, Win Rate: 0.36, Epsilon: 0.505, smartmove: 0.21
Wins: 2737, Losses: 3385, Draws: 1379
Episode: 7600, Win Rate: 0.37, Epsilon: 0.406, smartmove: 0.21
Wins: 2776, Losses: 3433, Draws: 1392
Episode: 7700, Win Rate: 0.36, Epsilon: 0.307, smartmove: 0.21
Wins: 2808, Losses: 3476, Draws: 1417
Episode: 7800, Win Rate: 0.37, Epsilon: 0.208, smartmove: 0.22
Wins: 2854, Losses: 3508, Draws: 1439
Episode: 7900, Win Rate: 0.37, Ep



Resetting epsilon to 1.0
Episode: 8000, Win Rate: 0.37, Epsilon: 0.011, smartmove: 0.22
Wins: 2947, Losses: 3560, Draws: 1494
Checkpoint saved at episode 8000
Episode: 8100, Win Rate: 0.37, Epsilon: 0.901, smartmove: 0.23
Wins: 2974, Losses: 3621, Draws: 1506
Episode: 8200, Win Rate: 0.37, Epsilon: 0.802, smartmove: 0.23
Wins: 3001, Losses: 3684, Draws: 1516
Episode: 8300, Win Rate: 0.37, Epsilon: 0.703, smartmove: 0.23
Wins: 3034, Losses: 3741, Draws: 1526
Episode: 8400, Win Rate: 0.36, Epsilon: 0.604, smartmove: 0.23
Wins: 3059, Losses: 3803, Draws: 1539
Episode: 8500, Win Rate: 0.36, Epsilon: 0.505, smartmove: 0.24
Wins: 3092, Losses: 3851, Draws: 1558
Episode: 8600, Win Rate: 0.36, Epsilon: 0.406, smartmove: 0.24
Wins: 3126, Losses: 3893, Draws: 1582
Episode: 8700, Win Rate: 0.36, Epsilon: 0.307, smartmove: 0.24
Wins: 3163, Losses: 3931, Draws: 1607
Episode: 8800, Win Rate: 0.36, Epsilon: 0.208, smartmove: 0.24
Wins: 3202, Losses: 3965, Draws: 1634
Episode: 8900, Win Rate: 0.37, Ep



Resetting epsilon to 1.0
Episode: 9000, Win Rate: 0.37, Epsilon: 0.011, smartmove: 0.25
Wins: 3300, Losses: 4023, Draws: 1678
Checkpoint saved at episode 9000
Episode: 9100, Win Rate: 0.37, Epsilon: 0.901, smartmove: 0.25
Wins: 3325, Losses: 4078, Draws: 1698
Episode: 9200, Win Rate: 0.36, Epsilon: 0.802, smartmove: 0.26
Wins: 3352, Losses: 4134, Draws: 1715
Episode: 9300, Win Rate: 0.36, Epsilon: 0.703, smartmove: 0.26
Wins: 3383, Losses: 4188, Draws: 1730
Episode: 9400, Win Rate: 0.36, Epsilon: 0.604, smartmove: 0.26
Wins: 3410, Losses: 4241, Draws: 1750
Episode: 9500, Win Rate: 0.36, Epsilon: 0.505, smartmove: 0.26
Wins: 3431, Losses: 4295, Draws: 1775
Episode: 9600, Win Rate: 0.36, Epsilon: 0.406, smartmove: 0.27
Wins: 3474, Losses: 4332, Draws: 1795
Episode: 9700, Win Rate: 0.36, Epsilon: 0.307, smartmove: 0.27
Wins: 3509, Losses: 4374, Draws: 1818
Episode: 9800, Win Rate: 0.36, Epsilon: 0.208, smartmove: 0.27
Wins: 3552, Losses: 4402, Draws: 1847
Episode: 9900, Win Rate: 0.36, Ep



Resetting epsilon to 1.0
Episode: 10000, Win Rate: 0.36, Epsilon: 0.011, smartmove: 0.28
Wins: 3641, Losses: 4459, Draws: 1901
Checkpoint saved at episode 10000
Episode: 10100, Win Rate: 0.36, Epsilon: 0.901, smartmove: 0.28
Wins: 3667, Losses: 4518, Draws: 1916
Episode: 10200, Win Rate: 0.36, Epsilon: 0.802, smartmove: 0.28
Wins: 3695, Losses: 4582, Draws: 1924
Episode: 10300, Win Rate: 0.36, Epsilon: 0.703, smartmove: 0.29
Wins: 3716, Losses: 4650, Draws: 1935
Episode: 10400, Win Rate: 0.36, Epsilon: 0.604, smartmove: 0.29
Wins: 3747, Losses: 4699, Draws: 1955
Episode: 10500, Win Rate: 0.36, Epsilon: 0.505, smartmove: 0.29
Wins: 3783, Losses: 4748, Draws: 1970
Episode: 10600, Win Rate: 0.36, Epsilon: 0.406, smartmove: 0.29
Wins: 3823, Losses: 4786, Draws: 1992
Episode: 10700, Win Rate: 0.36, Epsilon: 0.307, smartmove: 0.30
Wins: 3859, Losses: 4834, Draws: 2008
Episode: 10800, Win Rate: 0.36, Epsilon: 0.208, smartmove: 0.30
Wins: 3897, Losses: 4874, Draws: 2030
Episode: 10900, Win Rat



Resetting epsilon to 1.0
Episode: 11000, Win Rate: 0.36, Epsilon: 0.011, smartmove: 0.31
Wins: 3979, Losses: 4936, Draws: 2086
Checkpoint saved at episode 11000
Episode: 11100, Win Rate: 0.36, Epsilon: 0.901, smartmove: 0.31
Wins: 3998, Losses: 5006, Draws: 2097
Episode: 11200, Win Rate: 0.36, Epsilon: 0.802, smartmove: 0.31
Wins: 4026, Losses: 5074, Draws: 2101
Episode: 11300, Win Rate: 0.36, Epsilon: 0.703, smartmove: 0.31
Wins: 4059, Losses: 5133, Draws: 2109
Episode: 11400, Win Rate: 0.36, Epsilon: 0.604, smartmove: 0.32
Wins: 4092, Losses: 5189, Draws: 2120
Episode: 11500, Win Rate: 0.36, Epsilon: 0.505, smartmove: 0.32
Wins: 4123, Losses: 5237, Draws: 2141
Episode: 11600, Win Rate: 0.36, Epsilon: 0.406, smartmove: 0.32
Wins: 4152, Losses: 5281, Draws: 2168
Episode: 11700, Win Rate: 0.36, Epsilon: 0.307, smartmove: 0.33
Wins: 4185, Losses: 5327, Draws: 2189
Episode: 11800, Win Rate: 0.36, Epsilon: 0.208, smartmove: 0.33
Wins: 4221, Losses: 5359, Draws: 2221
Episode: 11900, Win Rat



Resetting epsilon to 1.0
Episode: 12000, Win Rate: 0.36, Epsilon: 0.011, smartmove: 0.33
Wins: 4292, Losses: 5421, Draws: 2288
Checkpoint saved at episode 12000
Episode: 12100, Win Rate: 0.36, Epsilon: 0.901, smartmove: 0.34
Wins: 4307, Losses: 5492, Draws: 2302
Episode: 12200, Win Rate: 0.35, Epsilon: 0.802, smartmove: 0.34
Wins: 4325, Losses: 5558, Draws: 2318
Episode: 12300, Win Rate: 0.35, Epsilon: 0.703, smartmove: 0.34
Wins: 4358, Losses: 5619, Draws: 2324
Episode: 12400, Win Rate: 0.35, Epsilon: 0.604, smartmove: 0.34
Wins: 4382, Losses: 5674, Draws: 2345
Episode: 12500, Win Rate: 0.35, Epsilon: 0.505, smartmove: 0.35
Wins: 4418, Losses: 5715, Draws: 2368
Episode: 12600, Win Rate: 0.35, Epsilon: 0.406, smartmove: 0.35
Wins: 4443, Losses: 5766, Draws: 2392
Episode: 12700, Win Rate: 0.35, Epsilon: 0.307, smartmove: 0.35
Wins: 4479, Losses: 5808, Draws: 2414
Episode: 12800, Win Rate: 0.35, Epsilon: 0.208, smartmove: 0.36
Wins: 4525, Losses: 5849, Draws: 2427
Episode: 12900, Win Rat



Resetting epsilon to 1.0
Episode: 13000, Win Rate: 0.35, Epsilon: 0.011, smartmove: 0.36
Wins: 4589, Losses: 5933, Draws: 2479
Checkpoint saved at episode 13000
Episode: 13100, Win Rate: 0.35, Epsilon: 0.901, smartmove: 0.36
Wins: 4609, Losses: 6003, Draws: 2489
Episode: 13200, Win Rate: 0.35, Epsilon: 0.802, smartmove: 0.37
Wins: 4636, Losses: 6060, Draws: 2505
Episode: 13300, Win Rate: 0.35, Epsilon: 0.703, smartmove: 0.37
Wins: 4658, Losses: 6123, Draws: 2520
Episode: 13400, Win Rate: 0.35, Epsilon: 0.604, smartmove: 0.37
Wins: 4677, Losses: 6181, Draws: 2543
Episode: 13500, Win Rate: 0.35, Epsilon: 0.505, smartmove: 0.38
Wins: 4695, Losses: 6241, Draws: 2565
Episode: 13600, Win Rate: 0.35, Epsilon: 0.406, smartmove: 0.38
Wins: 4718, Losses: 6293, Draws: 2590
Episode: 13700, Win Rate: 0.35, Epsilon: 0.307, smartmove: 0.38
Wins: 4742, Losses: 6343, Draws: 2616
Episode: 13800, Win Rate: 0.35, Epsilon: 0.208, smartmove: 0.38
Wins: 4774, Losses: 6384, Draws: 2643
Episode: 13900, Win Rat



Resetting epsilon to 1.0
Episode: 14000, Win Rate: 0.35, Epsilon: 0.011, smartmove: 0.39
Wins: 4851, Losses: 6443, Draws: 2707
Checkpoint saved at episode 14000
Episode: 14100, Win Rate: 0.35, Epsilon: 0.901, smartmove: 0.39
Wins: 4866, Losses: 6518, Draws: 2717
Episode: 14200, Win Rate: 0.34, Epsilon: 0.802, smartmove: 0.39
Wins: 4891, Losses: 6577, Draws: 2733
Episode: 14300, Win Rate: 0.34, Epsilon: 0.703, smartmove: 0.40
Wins: 4911, Losses: 6636, Draws: 2754
Episode: 14400, Win Rate: 0.34, Epsilon: 0.604, smartmove: 0.40
Wins: 4932, Losses: 6696, Draws: 2773
Episode: 14500, Win Rate: 0.34, Epsilon: 0.505, smartmove: 0.40
Wins: 4956, Losses: 6745, Draws: 2800
Episode: 14600, Win Rate: 0.34, Epsilon: 0.406, smartmove: 0.41
Wins: 4989, Losses: 6790, Draws: 2822
Episode: 14700, Win Rate: 0.34, Epsilon: 0.307, smartmove: 0.41
Wins: 5012, Losses: 6838, Draws: 2851
Episode: 14800, Win Rate: 0.34, Epsilon: 0.208, smartmove: 0.41
Wins: 5039, Losses: 6882, Draws: 2880
Episode: 14900, Win Rat



Resetting epsilon to 1.0
Episode: 15000, Win Rate: 0.34, Epsilon: 0.011, smartmove: 0.42
Wins: 5107, Losses: 6942, Draws: 2952
Checkpoint saved at episode 15000
Episode: 15100, Win Rate: 0.34, Epsilon: 0.901, smartmove: 0.42
Wins: 5124, Losses: 7013, Draws: 2964
Episode: 15200, Win Rate: 0.34, Epsilon: 0.802, smartmove: 0.42
Wins: 5137, Losses: 7081, Draws: 2983
Episode: 15300, Win Rate: 0.34, Epsilon: 0.703, smartmove: 0.42
Wins: 5157, Losses: 7146, Draws: 2998
Episode: 15400, Win Rate: 0.34, Epsilon: 0.604, smartmove: 0.43
Wins: 5181, Losses: 7204, Draws: 3016
Episode: 15500, Win Rate: 0.34, Epsilon: 0.505, smartmove: 0.43
Wins: 5203, Losses: 7263, Draws: 3035
Episode: 15600, Win Rate: 0.33, Epsilon: 0.406, smartmove: 0.43
Wins: 5224, Losses: 7309, Draws: 3068
Episode: 15700, Win Rate: 0.33, Epsilon: 0.307, smartmove: 0.44
Wins: 5251, Losses: 7359, Draws: 3091
Episode: 15800, Win Rate: 0.33, Epsilon: 0.208, smartmove: 0.44
Wins: 5280, Losses: 7402, Draws: 3119
Episode: 15900, Win Rat



Resetting epsilon to 1.0
Episode: 16000, Win Rate: 0.33, Epsilon: 0.011, smartmove: 0.44
Wins: 5340, Losses: 7468, Draws: 3193
Checkpoint saved at episode 16000
Episode: 16100, Win Rate: 0.33, Epsilon: 0.901, smartmove: 0.45
Wins: 5360, Losses: 7538, Draws: 3203
Episode: 16200, Win Rate: 0.33, Epsilon: 0.802, smartmove: 0.45
Wins: 5375, Losses: 7610, Draws: 3216
Episode: 16300, Win Rate: 0.33, Epsilon: 0.703, smartmove: 0.45
Wins: 5397, Losses: 7671, Draws: 3233
Episode: 16400, Win Rate: 0.33, Epsilon: 0.604, smartmove: 0.46
Wins: 5425, Losses: 7728, Draws: 3248
Episode: 16500, Win Rate: 0.33, Epsilon: 0.505, smartmove: 0.46
Wins: 5443, Losses: 7793, Draws: 3265
Episode: 16600, Win Rate: 0.33, Epsilon: 0.406, smartmove: 0.46
Wins: 5472, Losses: 7838, Draws: 3291
Episode: 16700, Win Rate: 0.33, Epsilon: 0.307, smartmove: 0.46
Wins: 5499, Losses: 7885, Draws: 3317
Episode: 16800, Win Rate: 0.33, Epsilon: 0.208, smartmove: 0.47
Wins: 5527, Losses: 7929, Draws: 3345
Episode: 16900, Win Rat



Resetting epsilon to 1.0
Episode: 17000, Win Rate: 0.33, Epsilon: 0.011, smartmove: 0.47
Wins: 5580, Losses: 7983, Draws: 3438
Checkpoint saved at episode 17000
Episode: 17100, Win Rate: 0.33, Epsilon: 0.901, smartmove: 0.47
Wins: 5595, Losses: 8058, Draws: 3448
Episode: 17200, Win Rate: 0.33, Epsilon: 0.802, smartmove: 0.48
Wins: 5612, Losses: 8129, Draws: 3460
Episode: 17300, Win Rate: 0.33, Epsilon: 0.703, smartmove: 0.48
Wins: 5630, Losses: 8195, Draws: 3476
Episode: 17400, Win Rate: 0.32, Epsilon: 0.604, smartmove: 0.48
Wins: 5653, Losses: 8256, Draws: 3492
Episode: 17500, Win Rate: 0.32, Epsilon: 0.505, smartmove: 0.49
Wins: 5668, Losses: 8313, Draws: 3520
Episode: 17600, Win Rate: 0.32, Epsilon: 0.406, smartmove: 0.49
Wins: 5695, Losses: 8368, Draws: 3538
Episode: 17700, Win Rate: 0.32, Epsilon: 0.307, smartmove: 0.49
Wins: 5719, Losses: 8416, Draws: 3566
Episode: 17800, Win Rate: 0.32, Epsilon: 0.208, smartmove: 0.49
Wins: 5734, Losses: 8474, Draws: 3593
Episode: 17900, Win Rat



Resetting epsilon to 1.0
Episode: 18000, Win Rate: 0.32, Epsilon: 0.011, smartmove: 0.50
Wins: 5793, Losses: 8531, Draws: 3677
Checkpoint saved at episode 18000
Episode: 18100, Win Rate: 0.32, Epsilon: 0.901, smartmove: 0.50
Wins: 5807, Losses: 8604, Draws: 3690
Episode: 18200, Win Rate: 0.32, Epsilon: 0.802, smartmove: 0.51
Wins: 5826, Losses: 8670, Draws: 3705
Episode: 18300, Win Rate: 0.32, Epsilon: 0.703, smartmove: 0.51
Wins: 5843, Losses: 8738, Draws: 3720
Episode: 18400, Win Rate: 0.32, Epsilon: 0.604, smartmove: 0.51
Wins: 5861, Losses: 8800, Draws: 3740
Episode: 18500, Win Rate: 0.32, Epsilon: 0.505, smartmove: 0.51
Wins: 5885, Losses: 8854, Draws: 3762
Episode: 18600, Win Rate: 0.32, Epsilon: 0.406, smartmove: 0.52
Wins: 5909, Losses: 8902, Draws: 3790
Episode: 18700, Win Rate: 0.32, Epsilon: 0.307, smartmove: 0.52
Wins: 5933, Losses: 8952, Draws: 3816
Episode: 18800, Win Rate: 0.32, Epsilon: 0.208, smartmove: 0.52
Wins: 5965, Losses: 8991, Draws: 3845
Episode: 18900, Win Rat



Resetting epsilon to 1.0
Episode: 19000, Win Rate: 0.32, Epsilon: 0.011, smartmove: 0.53
Wins: 6025, Losses: 9043, Draws: 3933
Checkpoint saved at episode 19000
Episode: 19100, Win Rate: 0.32, Epsilon: 0.901, smartmove: 0.53
Wins: 6039, Losses: 9116, Draws: 3946
Episode: 19200, Win Rate: 0.32, Epsilon: 0.802, smartmove: 0.53
Wins: 6058, Losses: 9184, Draws: 3959
Episode: 19300, Win Rate: 0.31, Epsilon: 0.703, smartmove: 0.54
Wins: 6075, Losses: 9252, Draws: 3974
Episode: 19400, Win Rate: 0.31, Epsilon: 0.604, smartmove: 0.54
Wins: 6097, Losses: 9308, Draws: 3996
Episode: 19500, Win Rate: 0.31, Epsilon: 0.505, smartmove: 0.54
Wins: 6114, Losses: 9365, Draws: 4022
Episode: 19600, Win Rate: 0.31, Epsilon: 0.406, smartmove: 0.54
Wins: 6142, Losses: 9406, Draws: 4053
Episode: 19700, Win Rate: 0.31, Epsilon: 0.307, smartmove: 0.55
Wins: 6159, Losses: 9454, Draws: 4088
Episode: 19800, Win Rate: 0.31, Epsilon: 0.208, smartmove: 0.55
Wins: 6184, Losses: 9502, Draws: 4115
Episode: 19900, Win Rat



Resetting epsilon to 1.0
Episode: 20000, Win Rate: 0.31, Epsilon: 0.011, smartmove: 0.56
Wins: 6249, Losses: 9557, Draws: 4195
Checkpoint saved at episode 20000
Episode: 20100, Win Rate: 0.31, Epsilon: 0.901, smartmove: 0.56
Wins: 6261, Losses: 9635, Draws: 4205
Episode: 20200, Win Rate: 0.31, Epsilon: 0.802, smartmove: 0.56
Wins: 6280, Losses: 9701, Draws: 4220
Episode: 20300, Win Rate: 0.31, Epsilon: 0.703, smartmove: 0.56
Wins: 6293, Losses: 9777, Draws: 4231
Episode: 20400, Win Rate: 0.31, Epsilon: 0.604, smartmove: 0.57
Wins: 6313, Losses: 9843, Draws: 4245
Episode: 20500, Win Rate: 0.31, Epsilon: 0.505, smartmove: 0.57
Wins: 6332, Losses: 9898, Draws: 4271
Episode: 20600, Win Rate: 0.31, Epsilon: 0.406, smartmove: 0.57
Wins: 6352, Losses: 9953, Draws: 4296
Episode: 20700, Win Rate: 0.31, Epsilon: 0.307, smartmove: 0.57
Wins: 6377, Losses: 9994, Draws: 4330
Episode: 20800, Win Rate: 0.31, Epsilon: 0.208, smartmove: 0.58
Wins: 6398, Losses: 10038, Draws: 4365
Episode: 20900, Win Ra



Resetting epsilon to 1.0
Episode: 21000, Win Rate: 0.31, Epsilon: 0.011, smartmove: 0.58
Wins: 6453, Losses: 10103, Draws: 4445
Checkpoint saved at episode 21000
Episode: 21100, Win Rate: 0.31, Epsilon: 0.901, smartmove: 0.59
Wins: 6464, Losses: 10183, Draws: 4454
Episode: 21200, Win Rate: 0.31, Epsilon: 0.802, smartmove: 0.59
Wins: 6481, Losses: 10260, Draws: 4460
Episode: 21300, Win Rate: 0.30, Epsilon: 0.703, smartmove: 0.59
Wins: 6495, Losses: 10324, Draws: 4482
Episode: 21400, Win Rate: 0.30, Epsilon: 0.604, smartmove: 0.59
Wins: 6510, Losses: 10399, Draws: 4492
Episode: 21500, Win Rate: 0.30, Epsilon: 0.505, smartmove: 0.60
Wins: 6523, Losses: 10457, Draws: 4521
Episode: 21600, Win Rate: 0.30, Epsilon: 0.406, smartmove: 0.60
Wins: 6543, Losses: 10507, Draws: 4551
Episode: 21700, Win Rate: 0.30, Epsilon: 0.307, smartmove: 0.60
Wins: 6572, Losses: 10557, Draws: 4572
Episode: 21800, Win Rate: 0.30, Epsilon: 0.208, smartmove: 0.61
Wins: 6595, Losses: 10596, Draws: 4610
Episode: 21900



Resetting epsilon to 1.0
Episode: 22000, Win Rate: 0.30, Epsilon: 0.011, smartmove: 0.61
Wins: 6650, Losses: 10658, Draws: 4693
Checkpoint saved at episode 22000
Episode: 22100, Win Rate: 0.30, Epsilon: 0.901, smartmove: 0.61
Wins: 6661, Losses: 10735, Draws: 4705
Episode: 22200, Win Rate: 0.30, Epsilon: 0.802, smartmove: 0.62
Wins: 6670, Losses: 10812, Draws: 4719
Episode: 22300, Win Rate: 0.30, Epsilon: 0.703, smartmove: 0.62
Wins: 6684, Losses: 10889, Draws: 4728
Episode: 22400, Win Rate: 0.30, Epsilon: 0.604, smartmove: 0.62
Wins: 6706, Losses: 10953, Draws: 4742
Episode: 22500, Win Rate: 0.30, Epsilon: 0.505, smartmove: 0.62
Wins: 6724, Losses: 11010, Draws: 4767
Episode: 22600, Win Rate: 0.30, Epsilon: 0.406, smartmove: 0.63
Wins: 6747, Losses: 11062, Draws: 4792
Episode: 22700, Win Rate: 0.30, Epsilon: 0.307, smartmove: 0.63
Wins: 6766, Losses: 11105, Draws: 4830
Episode: 22800, Win Rate: 0.30, Epsilon: 0.208, smartmove: 0.63
Wins: 6783, Losses: 11146, Draws: 4872
Episode: 22900



Resetting epsilon to 1.0
Episode: 23000, Win Rate: 0.30, Epsilon: 0.011, smartmove: 0.64
Wins: 6821, Losses: 11208, Draws: 4972
Checkpoint saved at episode 23000
Episode: 23100, Win Rate: 0.30, Epsilon: 0.901, smartmove: 0.64
Wins: 6839, Losses: 11281, Draws: 4981
Episode: 23200, Win Rate: 0.30, Epsilon: 0.802, smartmove: 0.64
Wins: 6856, Losses: 11345, Draws: 5000
Episode: 23300, Win Rate: 0.29, Epsilon: 0.703, smartmove: 0.65
Wins: 6870, Losses: 11416, Draws: 5015
Episode: 23400, Win Rate: 0.29, Epsilon: 0.604, smartmove: 0.65
Wins: 6888, Losses: 11475, Draws: 5038
Episode: 23500, Win Rate: 0.29, Epsilon: 0.505, smartmove: 0.65
Wins: 6906, Losses: 11535, Draws: 5060
Episode: 23600, Win Rate: 0.29, Epsilon: 0.406, smartmove: 0.66
Wins: 6928, Losses: 11584, Draws: 5089
Episode: 23700, Win Rate: 0.29, Epsilon: 0.307, smartmove: 0.66
Wins: 6953, Losses: 11628, Draws: 5120
Episode: 23800, Win Rate: 0.29, Epsilon: 0.208, smartmove: 0.66
Wins: 6979, Losses: 11668, Draws: 5154
Episode: 23900



Resetting epsilon to 1.0
Episode: 24000, Win Rate: 0.29, Epsilon: 0.011, smartmove: 0.67
Wins: 7040, Losses: 11719, Draws: 5242
Checkpoint saved at episode 24000
Episode: 24100, Win Rate: 0.29, Epsilon: 0.901, smartmove: 0.67
Wins: 7053, Losses: 11794, Draws: 5254
Episode: 24200, Win Rate: 0.29, Epsilon: 0.802, smartmove: 0.67
Wins: 7061, Losses: 11874, Draws: 5266
Episode: 24300, Win Rate: 0.29, Epsilon: 0.703, smartmove: 0.68
Wins: 7080, Losses: 11941, Draws: 5280
Episode: 24400, Win Rate: 0.29, Epsilon: 0.604, smartmove: 0.68
Wins: 7096, Losses: 12003, Draws: 5302
Episode: 24500, Win Rate: 0.29, Epsilon: 0.505, smartmove: 0.68
Wins: 7114, Losses: 12067, Draws: 5320
Episode: 24600, Win Rate: 0.29, Epsilon: 0.406, smartmove: 0.68
Wins: 7128, Losses: 12121, Draws: 5352
Episode: 24700, Win Rate: 0.29, Epsilon: 0.307, smartmove: 0.69
Wins: 7148, Losses: 12173, Draws: 5380
Episode: 24800, Win Rate: 0.29, Epsilon: 0.208, smartmove: 0.69
Wins: 7175, Losses: 12203, Draws: 5423
Episode: 24900



Resetting epsilon to 1.0
Episode: 25000, Win Rate: 0.29, Epsilon: 0.011, smartmove: 0.69
Wins: 7231, Losses: 12258, Draws: 5512
Checkpoint saved at episode 25000
Episode: 25100, Win Rate: 0.29, Epsilon: 0.901, smartmove: 0.70
Wins: 7237, Losses: 12344, Draws: 5520
Episode: 25200, Win Rate: 0.29, Epsilon: 0.802, smartmove: 0.70
Wins: 7251, Losses: 12415, Draws: 5535
Episode: 25300, Win Rate: 0.29, Epsilon: 0.703, smartmove: 0.70
Wins: 7264, Losses: 12480, Draws: 5557
Episode: 25400, Win Rate: 0.29, Epsilon: 0.604, smartmove: 0.71
Wins: 7276, Losses: 12546, Draws: 5579
Episode: 25500, Win Rate: 0.29, Epsilon: 0.505, smartmove: 0.71
Wins: 7292, Losses: 12608, Draws: 5601
Episode: 25600, Win Rate: 0.29, Epsilon: 0.406, smartmove: 0.71
Wins: 7311, Losses: 12658, Draws: 5632
Episode: 25700, Win Rate: 0.29, Epsilon: 0.307, smartmove: 0.71
Wins: 7331, Losses: 12707, Draws: 5663
Episode: 25800, Win Rate: 0.28, Epsilon: 0.208, smartmove: 0.72
Wins: 7350, Losses: 12753, Draws: 5698
Episode: 25900



Resetting epsilon to 1.0
Episode: 26000, Win Rate: 0.28, Epsilon: 0.011, smartmove: 0.72
Wins: 7401, Losses: 12821, Draws: 5779
Checkpoint saved at episode 26000
Episode: 26100, Win Rate: 0.28, Epsilon: 0.901, smartmove: 0.72
Wins: 7412, Losses: 12904, Draws: 5785
Episode: 26200, Win Rate: 0.28, Epsilon: 0.802, smartmove: 0.73
Wins: 7426, Losses: 12976, Draws: 5799
Episode: 26300, Win Rate: 0.28, Epsilon: 0.703, smartmove: 0.73
Wins: 7433, Losses: 13043, Draws: 5825
Episode: 26400, Win Rate: 0.28, Epsilon: 0.604, smartmove: 0.73
Wins: 7447, Losses: 13111, Draws: 5843
Episode: 26500, Win Rate: 0.28, Epsilon: 0.505, smartmove: 0.74
Wins: 7461, Losses: 13176, Draws: 5864
Episode: 26600, Win Rate: 0.28, Epsilon: 0.406, smartmove: 0.74
Wins: 7484, Losses: 13227, Draws: 5890
Episode: 26700, Win Rate: 0.28, Epsilon: 0.307, smartmove: 0.74
Wins: 7501, Losses: 13283, Draws: 5917
Episode: 26800, Win Rate: 0.28, Epsilon: 0.208, smartmove: 0.74
Wins: 7524, Losses: 13327, Draws: 5950
Episode: 26900



Resetting epsilon to 1.0
Episode: 27000, Win Rate: 0.28, Epsilon: 0.011, smartmove: 0.75
Wins: 7566, Losses: 13377, Draws: 6058
Checkpoint saved at episode 27000
Episode: 27100, Win Rate: 0.28, Epsilon: 0.901, smartmove: 0.75
Wins: 7570, Losses: 13465, Draws: 6066
Episode: 27200, Win Rate: 0.28, Epsilon: 0.802, smartmove: 0.76
Wins: 7580, Losses: 13543, Draws: 6078
Episode: 27300, Win Rate: 0.28, Epsilon: 0.703, smartmove: 0.76
Wins: 7588, Losses: 13619, Draws: 6094
Episode: 27400, Win Rate: 0.28, Epsilon: 0.604, smartmove: 0.76
Wins: 7601, Losses: 13684, Draws: 6116
Episode: 27500, Win Rate: 0.28, Epsilon: 0.505, smartmove: 0.76
Wins: 7612, Losses: 13749, Draws: 6140
Episode: 27600, Win Rate: 0.28, Epsilon: 0.406, smartmove: 0.77
Wins: 7632, Losses: 13801, Draws: 6168
Episode: 27700, Win Rate: 0.28, Epsilon: 0.307, smartmove: 0.77
Wins: 7653, Losses: 13849, Draws: 6199
Episode: 27800, Win Rate: 0.28, Epsilon: 0.208, smartmove: 0.77
Wins: 7676, Losses: 13885, Draws: 6240
Episode: 27900



Resetting epsilon to 1.0
Episode: 28000, Win Rate: 0.28, Epsilon: 0.011, smartmove: 0.78
Wins: 7714, Losses: 13941, Draws: 6346
Checkpoint saved at episode 28000
Episode: 28100, Win Rate: 0.27, Epsilon: 0.901, smartmove: 0.78
Wins: 7723, Losses: 14021, Draws: 6357
Episode: 28200, Win Rate: 0.27, Epsilon: 0.802, smartmove: 0.78
Wins: 7730, Losses: 14100, Draws: 6371
Episode: 28300, Win Rate: 0.27, Epsilon: 0.703, smartmove: 0.79
Wins: 7743, Losses: 14170, Draws: 6388
Episode: 28400, Win Rate: 0.27, Epsilon: 0.604, smartmove: 0.79
Wins: 7756, Losses: 14230, Draws: 6415
Episode: 28500, Win Rate: 0.27, Epsilon: 0.505, smartmove: 0.79
Wins: 7766, Losses: 14295, Draws: 6440
Episode: 28600, Win Rate: 0.27, Epsilon: 0.406, smartmove: 0.79
Wins: 7784, Losses: 14349, Draws: 6468
Episode: 28700, Win Rate: 0.27, Epsilon: 0.307, smartmove: 0.80
Wins: 7794, Losses: 14405, Draws: 6502
Episode: 28800, Win Rate: 0.27, Epsilon: 0.208, smartmove: 0.80
Wins: 7812, Losses: 14443, Draws: 6546
Episode: 28900



Resetting epsilon to 1.0
Episode: 29000, Win Rate: 0.27, Epsilon: 0.011, smartmove: 0.81
Wins: 7856, Losses: 14505, Draws: 6640
Checkpoint saved at episode 29000
Episode: 29100, Win Rate: 0.27, Epsilon: 0.901, smartmove: 0.81
Wins: 7863, Losses: 14586, Draws: 6652
Episode: 29200, Win Rate: 0.27, Epsilon: 0.802, smartmove: 0.81
Wins: 7871, Losses: 14657, Draws: 6673
Episode: 29300, Win Rate: 0.27, Epsilon: 0.703, smartmove: 0.81
Wins: 7879, Losses: 14740, Draws: 6682
Episode: 29400, Win Rate: 0.27, Epsilon: 0.604, smartmove: 0.82
Wins: 7886, Losses: 14817, Draws: 6698
Episode: 29500, Win Rate: 0.27, Epsilon: 0.505, smartmove: 0.82
Wins: 7896, Losses: 14876, Draws: 6729
Episode: 29600, Win Rate: 0.27, Epsilon: 0.406, smartmove: 0.82
Wins: 7912, Losses: 14934, Draws: 6755
Episode: 29700, Win Rate: 0.27, Epsilon: 0.307, smartmove: 0.82
Wins: 7929, Losses: 14982, Draws: 6790
Episode: 29800, Win Rate: 0.27, Epsilon: 0.208, smartmove: 0.83
Wins: 7945, Losses: 15029, Draws: 6827
Episode: 29900



Resetting epsilon to 1.0
Episode: 30000, Win Rate: 0.27, Epsilon: 0.011, smartmove: 0.83
Wins: 7985, Losses: 15099, Draws: 6917
Checkpoint saved at episode 30000
Episode: 30100, Win Rate: 0.27, Epsilon: 0.901, smartmove: 0.84
Wins: 7988, Losses: 15185, Draws: 6928
Episode: 30200, Win Rate: 0.26, Epsilon: 0.802, smartmove: 0.84
Wins: 7995, Losses: 15266, Draws: 6940
Episode: 30300, Win Rate: 0.26, Epsilon: 0.703, smartmove: 0.84
Wins: 8000, Losses: 15344, Draws: 6957
Episode: 30400, Win Rate: 0.26, Epsilon: 0.604, smartmove: 0.84
Wins: 8001, Losses: 15424, Draws: 6976
Episode: 30500, Win Rate: 0.26, Epsilon: 0.505, smartmove: 0.85
Wins: 8011, Losses: 15493, Draws: 6997
Episode: 30600, Win Rate: 0.26, Epsilon: 0.406, smartmove: 0.85
Wins: 8026, Losses: 15550, Draws: 7025
Episode: 30700, Win Rate: 0.26, Epsilon: 0.307, smartmove: 0.85
Wins: 8041, Losses: 15607, Draws: 7053
Episode: 30800, Win Rate: 0.26, Epsilon: 0.208, smartmove: 0.86
Wins: 8059, Losses: 15656, Draws: 7086
Episode: 30900



Resetting epsilon to 1.0
Episode: 31000, Win Rate: 0.26, Epsilon: 0.011, smartmove: 0.86
Wins: 8097, Losses: 15721, Draws: 7183
Checkpoint saved at episode 31000
Episode: 31100, Win Rate: 0.26, Epsilon: 0.901, smartmove: 0.86
Wins: 8103, Losses: 15805, Draws: 7193
Episode: 31200, Win Rate: 0.26, Epsilon: 0.802, smartmove: 0.87
Wins: 8110, Losses: 15879, Draws: 7212
Episode: 31300, Win Rate: 0.26, Epsilon: 0.703, smartmove: 0.87
Wins: 8118, Losses: 15953, Draws: 7230
Episode: 31400, Win Rate: 0.26, Epsilon: 0.604, smartmove: 0.87
Wins: 8125, Losses: 16022, Draws: 7254
Episode: 31500, Win Rate: 0.26, Epsilon: 0.505, smartmove: 0.88
Wins: 8138, Losses: 16076, Draws: 7287
Episode: 31600, Win Rate: 0.26, Epsilon: 0.406, smartmove: 0.88
Wins: 8152, Losses: 16134, Draws: 7315
Episode: 31700, Win Rate: 0.26, Epsilon: 0.307, smartmove: 0.88
Wins: 8171, Losses: 16188, Draws: 7342
Episode: 31800, Win Rate: 0.26, Epsilon: 0.208, smartmove: 0.88
Wins: 8182, Losses: 16238, Draws: 7381
Episode: 31900



Resetting epsilon to 1.0
Episode: 32000, Win Rate: 0.26, Epsilon: 0.011, smartmove: 0.89
Wins: 8218, Losses: 16313, Draws: 7470
Checkpoint saved at episode 32000
Episode: 32100, Win Rate: 0.26, Epsilon: 0.901, smartmove: 0.89
Wins: 8221, Losses: 16399, Draws: 7481
Episode: 32200, Win Rate: 0.26, Epsilon: 0.802, smartmove: 0.89
Wins: 8227, Losses: 16479, Draws: 7495
Episode: 32300, Win Rate: 0.25, Epsilon: 0.703, smartmove: 0.90
Wins: 8234, Losses: 16561, Draws: 7506
Episode: 32400, Win Rate: 0.25, Epsilon: 0.604, smartmove: 0.90
Wins: 8244, Losses: 16631, Draws: 7526
Episode: 32500, Win Rate: 0.25, Epsilon: 0.505, smartmove: 0.90
Wins: 8251, Losses: 16701, Draws: 7549
Episode: 32600, Win Rate: 0.25, Epsilon: 0.406, smartmove: 0.91
Wins: 8264, Losses: 16753, Draws: 7584
Episode: 32700, Win Rate: 0.25, Epsilon: 0.307, smartmove: 0.91
Wins: 8274, Losses: 16817, Draws: 7610
Episode: 32800, Win Rate: 0.25, Epsilon: 0.208, smartmove: 0.91
Wins: 8281, Losses: 16878, Draws: 7642
Episode: 32900



Resetting epsilon to 1.0
Episode: 33000, Win Rate: 0.25, Epsilon: 0.011, smartmove: 0.92
Wins: 8301, Losses: 16943, Draws: 7757
Checkpoint saved at episode 33000
Episode: 33100, Win Rate: 0.25, Epsilon: 0.901, smartmove: 0.92
Wins: 8306, Losses: 17026, Draws: 7769
Episode: 33200, Win Rate: 0.25, Epsilon: 0.802, smartmove: 0.92
Wins: 8311, Losses: 17114, Draws: 7776
Episode: 33300, Win Rate: 0.25, Epsilon: 0.703, smartmove: 0.93
Wins: 8314, Losses: 17194, Draws: 7793
Episode: 33400, Win Rate: 0.25, Epsilon: 0.604, smartmove: 0.93
Wins: 8318, Losses: 17269, Draws: 7814
Episode: 33500, Win Rate: 0.25, Epsilon: 0.505, smartmove: 0.93
Wins: 8329, Losses: 17336, Draws: 7836
Episode: 33600, Win Rate: 0.25, Epsilon: 0.406, smartmove: 0.93
Wins: 8334, Losses: 17399, Draws: 7868
Episode: 33700, Win Rate: 0.25, Epsilon: 0.307, smartmove: 0.94
Wins: 8342, Losses: 17452, Draws: 7907
Episode: 33800, Win Rate: 0.25, Epsilon: 0.208, smartmove: 0.94
Wins: 8348, Losses: 17505, Draws: 7948
Episode: 33900



Resetting epsilon to 1.0
Episode: 34000, Win Rate: 0.25, Epsilon: 0.011, smartmove: 0.94
Wins: 8370, Losses: 17598, Draws: 8033
Checkpoint saved at episode 34000
Episode: 34100, Win Rate: 0.25, Epsilon: 0.901, smartmove: 0.95
Wins: 8373, Losses: 17684, Draws: 8044
Episode: 34200, Win Rate: 0.24, Epsilon: 0.802, smartmove: 0.95
Wins: 8377, Losses: 17765, Draws: 8059
Episode: 34300, Win Rate: 0.24, Epsilon: 0.703, smartmove: 0.95
Wins: 8383, Losses: 17844, Draws: 8074
Episode: 34400, Win Rate: 0.24, Epsilon: 0.604, smartmove: 0.96
Wins: 8388, Losses: 17910, Draws: 8103
Episode: 34500, Win Rate: 0.24, Epsilon: 0.505, smartmove: 0.96
Wins: 8399, Losses: 17979, Draws: 8123
Episode: 34600, Win Rate: 0.24, Epsilon: 0.406, smartmove: 0.96
Wins: 8408, Losses: 18043, Draws: 8150
Episode: 34700, Win Rate: 0.24, Epsilon: 0.307, smartmove: 0.96
Wins: 8413, Losses: 18108, Draws: 8180
Episode: 34800, Win Rate: 0.24, Epsilon: 0.208, smartmove: 0.97
Wins: 8421, Losses: 18153, Draws: 8227
Episode: 34900



Resetting epsilon to 1.0
Episode: 35000, Win Rate: 0.24, Epsilon: 0.011, smartmove: 0.97
Wins: 8436, Losses: 18225, Draws: 8340
Checkpoint saved at episode 35000
Episode: 35100, Win Rate: 0.24, Epsilon: 0.901, smartmove: 0.97
Wins: 8437, Losses: 18312, Draws: 8352
Episode: 35200, Win Rate: 0.24, Epsilon: 0.802, smartmove: 0.98
Wins: 8440, Losses: 18394, Draws: 8367
Episode: 35300, Win Rate: 0.24, Epsilon: 0.703, smartmove: 0.98
Wins: 8441, Losses: 18478, Draws: 8382
Episode: 35400, Win Rate: 0.24, Epsilon: 0.604, smartmove: 0.98
Wins: 8445, Losses: 18562, Draws: 8394
Episode: 35500, Win Rate: 0.24, Epsilon: 0.505, smartmove: 0.99
Wins: 8447, Losses: 18636, Draws: 8418
Episode: 35600, Win Rate: 0.24, Epsilon: 0.406, smartmove: 0.99
Wins: 8450, Losses: 18697, Draws: 8454
Episode: 35700, Win Rate: 0.24, Epsilon: 0.307, smartmove: 0.99
Wins: 8457, Losses: 18753, Draws: 8491
Episode: 35800, Win Rate: 0.24, Epsilon: 0.208, smartmove: 0.99
Wins: 8463, Losses: 18806, Draws: 8532
Episode: 35900



Resetting epsilon to 1.0
Episode: 36000, Win Rate: 0.24, Epsilon: 0.011, smartmove: 1.00
Wins: 8473, Losses: 18884, Draws: 8644
Checkpoint saved at episode 36000
Episode: 36100, Win Rate: 0.23, Epsilon: 0.901, smartmove: 1.00
Wins: 8475, Losses: 18973, Draws: 8653
Episode: 36200, Win Rate: 0.23, Epsilon: 0.802, smartmove: 1.00
Wins: 8476, Losses: 19060, Draws: 8665
Episode: 36300, Win Rate: 0.23, Epsilon: 0.703, smartmove: 1.00
Wins: 8480, Losses: 19144, Draws: 8677
Episode: 36400, Win Rate: 0.23, Epsilon: 0.604, smartmove: 1.00
Wins: 8484, Losses: 19220, Draws: 8697
Episode: 36500, Win Rate: 0.23, Epsilon: 0.505, smartmove: 1.00
Wins: 8486, Losses: 19295, Draws: 8720
Episode: 36600, Win Rate: 0.23, Epsilon: 0.406, smartmove: 1.00
Wins: 8490, Losses: 19355, Draws: 8756
Episode: 36700, Win Rate: 0.23, Epsilon: 0.307, smartmove: 1.00
Wins: 8496, Losses: 19401, Draws: 8804
Episode: 36800, Win Rate: 0.23, Epsilon: 0.208, smartmove: 1.00
Wins: 8500, Losses: 19456, Draws: 8845
Episode: 36900



Resetting epsilon to 1.0
Episode: 37000, Win Rate: 0.23, Epsilon: 0.011, smartmove: 1.00
Wins: 8503, Losses: 19529, Draws: 8969
Checkpoint saved at episode 37000
Episode: 37100, Win Rate: 0.23, Epsilon: 0.901, smartmove: 1.00
Wins: 8503, Losses: 19619, Draws: 8979
Episode: 37200, Win Rate: 0.23, Epsilon: 0.802, smartmove: 1.00
Wins: 8506, Losses: 19704, Draws: 8991
Episode: 37300, Win Rate: 0.23, Epsilon: 0.703, smartmove: 1.00
Wins: 8509, Losses: 19785, Draws: 9007
Episode: 37400, Win Rate: 0.23, Epsilon: 0.604, smartmove: 1.00
Wins: 8514, Losses: 19854, Draws: 9033
Episode: 37500, Win Rate: 0.23, Epsilon: 0.505, smartmove: 1.00
Wins: 8518, Losses: 19931, Draws: 9052
Episode: 37600, Win Rate: 0.23, Epsilon: 0.406, smartmove: 1.00
Wins: 8523, Losses: 19983, Draws: 9095
Episode: 37700, Win Rate: 0.23, Epsilon: 0.307, smartmove: 1.00
Wins: 8526, Losses: 20040, Draws: 9135
Episode: 37800, Win Rate: 0.23, Epsilon: 0.208, smartmove: 1.00
Wins: 8533, Losses: 20093, Draws: 9175
Episode: 37900



Resetting epsilon to 1.0
Episode: 38000, Win Rate: 0.22, Epsilon: 0.011, smartmove: 1.00
Wins: 8549, Losses: 20175, Draws: 9277
Checkpoint saved at episode 38000
Episode: 38100, Win Rate: 0.22, Epsilon: 0.901, smartmove: 1.00
Wins: 8551, Losses: 20264, Draws: 9286
Episode: 38200, Win Rate: 0.22, Epsilon: 0.802, smartmove: 1.00
Wins: 8551, Losses: 20343, Draws: 9307
Episode: 38300, Win Rate: 0.22, Epsilon: 0.703, smartmove: 1.00
Wins: 8553, Losses: 20428, Draws: 9320
Episode: 38400, Win Rate: 0.22, Epsilon: 0.604, smartmove: 1.00
Wins: 8557, Losses: 20500, Draws: 9344
Episode: 38500, Win Rate: 0.22, Epsilon: 0.505, smartmove: 1.00
Wins: 8562, Losses: 20574, Draws: 9365
Episode: 38600, Win Rate: 0.22, Epsilon: 0.406, smartmove: 1.00
Wins: 8568, Losses: 20639, Draws: 9394
Episode: 38700, Win Rate: 0.22, Epsilon: 0.307, smartmove: 1.00
Wins: 8574, Losses: 20698, Draws: 9429
Episode: 38800, Win Rate: 0.22, Epsilon: 0.208, smartmove: 1.00
Wins: 8579, Losses: 20744, Draws: 9478
Episode: 38900



Resetting epsilon to 1.0
Episode: 39000, Win Rate: 0.22, Epsilon: 0.011, smartmove: 1.00
Wins: 8588, Losses: 20820, Draws: 9593
Checkpoint saved at episode 39000
Episode: 39100, Win Rate: 0.22, Epsilon: 0.901, smartmove: 1.00
Wins: 8589, Losses: 20910, Draws: 9602
Episode: 39200, Win Rate: 0.22, Epsilon: 0.802, smartmove: 1.00
Wins: 8593, Losses: 20996, Draws: 9612
Episode: 39300, Win Rate: 0.22, Epsilon: 0.703, smartmove: 1.00
Wins: 8594, Losses: 21080, Draws: 9627
Episode: 39400, Win Rate: 0.22, Epsilon: 0.604, smartmove: 1.00
Wins: 8597, Losses: 21148, Draws: 9656
Episode: 39500, Win Rate: 0.22, Epsilon: 0.505, smartmove: 1.00
Wins: 8599, Losses: 21222, Draws: 9680
Episode: 39600, Win Rate: 0.22, Epsilon: 0.406, smartmove: 1.00
Wins: 8604, Losses: 21287, Draws: 9710
Episode: 39700, Win Rate: 0.22, Epsilon: 0.307, smartmove: 1.00
Wins: 8615, Losses: 21342, Draws: 9744
Episode: 39800, Win Rate: 0.22, Epsilon: 0.208, smartmove: 1.00
Wins: 8622, Losses: 21398, Draws: 9781
Episode: 39900



Final model saved as model6_retrain_retrain.h5


In [34]:
import numpy as np
import random
import os
from collections import deque
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.95, model_path='model6.h5'):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9995
        self.batch_size = 32
        self.replay_buffer = deque(maxlen=10000)
        self.model_path = model_path

        # Try to load the model if it exists, otherwise create a new one
        if os.path.exists(model_path):
            try:
                print(f'{model_path} exists. Loading the model.')
                self.model = load_model(model_path, compile=False)
                # Recompile the model with explicit loss function
                self.model.compile(
                    loss=MeanSquaredError(),
                    optimizer=Adam(learning_rate=self.learning_rate)
                )
            except Exception as e:
                print(f"Error loading model: {e}")
                print("Creating new model instead.")
                self.model = self._build_model()
        else:
            print(f'{model_path} does not exist. Starting new training.')
            self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(
            loss=MeanSquaredError(),
            optimizer=Adam(learning_rate=self.learning_rate)
        )
        return model

    def process_state(self, state):
        """
        Converts the state from [0,1,2] format to [-1,0,1] format for better learning.
        """
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        
        # Use batch prediction for efficiency
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                next_valid_actions = [j for j, val in enumerate(next_state) if val == 0]
                if next_valid_actions:
                    max_next_q = max(next_q_values[i][action] for action in next_valid_actions)
                    target = reward + self.gamma * max_next_q
                else:
                    target = reward
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=self.batch_size, epochs=1, verbose=0)

def train_agent(episodes=40000, model_path='model6_retrain_retrain_retrain_retrain.h5'):
    agent = SQNAgent(model_path=model_path)
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    initial_epsilon = 1.0
    min_epsilon = 0.01
    decay_episodes = 1000
    smartness=0
    for episode in range(episodes):
        if episode % 1000 == 0 and episode > 0:
            
            print("Resetting epsilon to 1.0")
        else:
            epsilon_decay = (initial_epsilon - min_epsilon) / decay_episodes
            agent.epsilon = max(min_epsilon, initial_epsilon - (episode % 1000) * epsilon_decay)
        
        smartness = min(1, episode / (episodes * 0.9))
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 1.0
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            game.player1_move()
            if game.current_winner == 1:
                reward = -1.0
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = min(-0.1, -0.5 * smartness)
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        if episode % 100 == 0:
            win_rate = history['wins'] / (episode + 1)
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Epsilon: {agent.epsilon:.3f}, smartmove: {smartness:.2f}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
        
        if episode % 1000 == 0:
            try:
                agent.model.save(f'model6_retrain_retrain_retrain_episode_{episode}.h5')
                print(f"Checkpoint saved at episode {episode}")
            except Exception as e:
                print(f"Error saving checkpoint: {e}")

    # Save the final model
    try:
        agent.model.save("model6_retrain_retrain_retrain_retrain_retrain.h5")
        print(f'Final model saved as model6_retrain_retrain.h5')
    except Exception as e:
        print(f"Error saving final model: {e}")


train_agent()



model6_retrain_retrain_retrain_retrain.h5 exists. Loading the model.
Episode: 0, Win Rate: 0.00, Epsilon: 1.000, smartmove: 0.00
Wins: 0, Losses: 0, Draws: 1
Checkpoint saved at episode 0
Episode: 100, Win Rate: 0.28, Epsilon: 0.901, smartmove: 0.00
Wins: 28, Losses: 61, Draws: 12
Episode: 200, Win Rate: 0.32, Epsilon: 0.802, smartmove: 0.01
Wins: 64, Losses: 114, Draws: 23
Episode: 300, Win Rate: 0.32, Epsilon: 0.703, smartmove: 0.01
Wins: 96, Losses: 170, Draws: 35
Episode: 400, Win Rate: 0.35, Epsilon: 0.604, smartmove: 0.01
Wins: 142, Losses: 207, Draws: 52
Episode: 500, Win Rate: 0.36, Epsilon: 0.505, smartmove: 0.01
Wins: 181, Losses: 251, Draws: 69
Episode: 600, Win Rate: 0.37, Epsilon: 0.406, smartmove: 0.02
Wins: 224, Losses: 296, Draws: 81
Episode: 700, Win Rate: 0.39, Epsilon: 0.307, smartmove: 0.02
Wins: 272, Losses: 330, Draws: 99
Episode: 800, Win Rate: 0.40, Epsilon: 0.208, smartmove: 0.02
Wins: 317, Losses: 369, Draws: 115
Episode: 900, Win Rate: 0.40, Epsilon: 0.109, s



Resetting epsilon to 1.0
Episode: 1000, Win Rate: 0.40, Epsilon: 0.011, smartmove: 0.03
Wins: 396, Losses: 447, Draws: 158
Checkpoint saved at episode 1000
Episode: 1100, Win Rate: 0.38, Epsilon: 0.901, smartmove: 0.03
Wins: 418, Losses: 510, Draws: 173
Episode: 1200, Win Rate: 0.38, Epsilon: 0.802, smartmove: 0.03
Wins: 451, Losses: 563, Draws: 187
Episode: 1300, Win Rate: 0.37, Epsilon: 0.703, smartmove: 0.04
Wins: 484, Losses: 612, Draws: 205
Episode: 1400, Win Rate: 0.37, Epsilon: 0.604, smartmove: 0.04
Wins: 520, Losses: 656, Draws: 225
Episode: 1500, Win Rate: 0.38, Epsilon: 0.505, smartmove: 0.04
Wins: 563, Losses: 695, Draws: 243
Episode: 1600, Win Rate: 0.38, Epsilon: 0.406, smartmove: 0.04
Wins: 609, Losses: 732, Draws: 260
Episode: 1700, Win Rate: 0.38, Epsilon: 0.307, smartmove: 0.05
Wins: 654, Losses: 772, Draws: 275
Episode: 1800, Win Rate: 0.39, Epsilon: 0.208, smartmove: 0.05
Wins: 694, Losses: 813, Draws: 294
Episode: 1900, Win Rate: 0.39, Epsilon: 0.109, smartmove: 0.



Resetting epsilon to 1.0
Episode: 2000, Win Rate: 0.39, Epsilon: 0.011, smartmove: 0.06
Wins: 788, Losses: 861, Draws: 352
Checkpoint saved at episode 2000
Episode: 2100, Win Rate: 0.39, Epsilon: 0.901, smartmove: 0.06
Wins: 811, Losses: 927, Draws: 363
Episode: 2200, Win Rate: 0.38, Epsilon: 0.802, smartmove: 0.06
Wins: 839, Losses: 983, Draws: 379
Episode: 2300, Win Rate: 0.38, Epsilon: 0.703, smartmove: 0.06
Wins: 877, Losses: 1031, Draws: 393
Episode: 2400, Win Rate: 0.38, Epsilon: 0.604, smartmove: 0.07
Wins: 914, Losses: 1080, Draws: 407
Episode: 2500, Win Rate: 0.38, Epsilon: 0.505, smartmove: 0.07
Wins: 947, Losses: 1126, Draws: 428
Episode: 2600, Win Rate: 0.38, Epsilon: 0.406, smartmove: 0.07
Wins: 985, Losses: 1171, Draws: 445
Episode: 2700, Win Rate: 0.38, Epsilon: 0.307, smartmove: 0.07
Wins: 1031, Losses: 1206, Draws: 464
Episode: 2800, Win Rate: 0.38, Epsilon: 0.208, smartmove: 0.08
Wins: 1061, Losses: 1251, Draws: 489
Episode: 2900, Win Rate: 0.38, Epsilon: 0.109, smart



Resetting epsilon to 1.0
Episode: 3000, Win Rate: 0.38, Epsilon: 0.011, smartmove: 0.08
Wins: 1143, Losses: 1307, Draws: 551
Checkpoint saved at episode 3000
Episode: 3100, Win Rate: 0.38, Epsilon: 0.901, smartmove: 0.09
Wins: 1170, Losses: 1370, Draws: 561
Episode: 3200, Win Rate: 0.37, Epsilon: 0.802, smartmove: 0.09
Wins: 1197, Losses: 1427, Draws: 577
Episode: 3300, Win Rate: 0.37, Epsilon: 0.703, smartmove: 0.09
Wins: 1232, Losses: 1480, Draws: 589
Episode: 3400, Win Rate: 0.37, Epsilon: 0.604, smartmove: 0.09
Wins: 1270, Losses: 1520, Draws: 611
Episode: 3500, Win Rate: 0.37, Epsilon: 0.505, smartmove: 0.10
Wins: 1310, Losses: 1559, Draws: 632
Episode: 3600, Win Rate: 0.37, Epsilon: 0.406, smartmove: 0.10
Wins: 1350, Losses: 1601, Draws: 650
Episode: 3700, Win Rate: 0.38, Epsilon: 0.307, smartmove: 0.10
Wins: 1408, Losses: 1623, Draws: 670
Episode: 3800, Win Rate: 0.38, Epsilon: 0.208, smartmove: 0.11
Wins: 1451, Losses: 1658, Draws: 692
Episode: 3900, Win Rate: 0.39, Epsilon: 0.



Resetting epsilon to 1.0
Episode: 4000, Win Rate: 0.39, Epsilon: 0.011, smartmove: 0.11
Wins: 1551, Losses: 1713, Draws: 737
Checkpoint saved at episode 4000
Episode: 4100, Win Rate: 0.38, Epsilon: 0.901, smartmove: 0.11
Wins: 1576, Losses: 1776, Draws: 749
Episode: 4200, Win Rate: 0.38, Epsilon: 0.802, smartmove: 0.12
Wins: 1603, Losses: 1834, Draws: 764
Episode: 4300, Win Rate: 0.38, Epsilon: 0.703, smartmove: 0.12
Wins: 1629, Losses: 1891, Draws: 781
Episode: 4400, Win Rate: 0.38, Epsilon: 0.604, smartmove: 0.12
Wins: 1670, Losses: 1935, Draws: 796
Episode: 4500, Win Rate: 0.38, Epsilon: 0.505, smartmove: 0.12
Wins: 1703, Losses: 1987, Draws: 811
Episode: 4600, Win Rate: 0.38, Epsilon: 0.406, smartmove: 0.13
Wins: 1742, Losses: 2029, Draws: 830
Episode: 4700, Win Rate: 0.38, Epsilon: 0.307, smartmove: 0.13
Wins: 1791, Losses: 2063, Draws: 847
Episode: 4800, Win Rate: 0.38, Epsilon: 0.208, smartmove: 0.13
Wins: 1835, Losses: 2097, Draws: 869
Episode: 4900, Win Rate: 0.38, Epsilon: 0.



Resetting epsilon to 1.0
Episode: 5000, Win Rate: 0.39, Epsilon: 0.011, smartmove: 0.14
Wins: 1935, Losses: 2145, Draws: 921
Checkpoint saved at episode 5000
Episode: 5100, Win Rate: 0.39, Epsilon: 0.901, smartmove: 0.14
Wins: 1966, Losses: 2202, Draws: 933
Episode: 5200, Win Rate: 0.38, Epsilon: 0.802, smartmove: 0.14
Wins: 1996, Losses: 2258, Draws: 947
Episode: 5300, Win Rate: 0.38, Epsilon: 0.703, smartmove: 0.15
Wins: 2029, Losses: 2314, Draws: 958
Episode: 5400, Win Rate: 0.38, Epsilon: 0.604, smartmove: 0.15
Wins: 2056, Losses: 2370, Draws: 975
Episode: 5500, Win Rate: 0.38, Epsilon: 0.505, smartmove: 0.15
Wins: 2097, Losses: 2418, Draws: 986
Episode: 5600, Win Rate: 0.38, Epsilon: 0.406, smartmove: 0.16
Wins: 2134, Losses: 2456, Draws: 1011
Episode: 5700, Win Rate: 0.38, Epsilon: 0.307, smartmove: 0.16
Wins: 2178, Losses: 2489, Draws: 1034
Episode: 5800, Win Rate: 0.38, Epsilon: 0.208, smartmove: 0.16
Wins: 2216, Losses: 2522, Draws: 1063
Episode: 5900, Win Rate: 0.38, Epsilon:



Resetting epsilon to 1.0
Episode: 6000, Win Rate: 0.39, Epsilon: 0.011, smartmove: 0.17
Wins: 2314, Losses: 2582, Draws: 1105
Checkpoint saved at episode 6000
Episode: 6100, Win Rate: 0.38, Epsilon: 0.901, smartmove: 0.17
Wins: 2341, Losses: 2643, Draws: 1117
Episode: 6200, Win Rate: 0.38, Epsilon: 0.802, smartmove: 0.17
Wins: 2373, Losses: 2689, Draws: 1139
Episode: 6300, Win Rate: 0.38, Epsilon: 0.703, smartmove: 0.17
Wins: 2402, Losses: 2744, Draws: 1155
Episode: 6400, Win Rate: 0.38, Epsilon: 0.604, smartmove: 0.18
Wins: 2440, Losses: 2794, Draws: 1167
Episode: 6500, Win Rate: 0.38, Epsilon: 0.505, smartmove: 0.18
Wins: 2475, Losses: 2842, Draws: 1184
Episode: 6600, Win Rate: 0.38, Epsilon: 0.406, smartmove: 0.18
Wins: 2515, Losses: 2886, Draws: 1200
Episode: 6700, Win Rate: 0.38, Epsilon: 0.307, smartmove: 0.19
Wins: 2556, Losses: 2926, Draws: 1219
Episode: 6800, Win Rate: 0.38, Epsilon: 0.208, smartmove: 0.19
Wins: 2600, Losses: 2962, Draws: 1239
Episode: 6900, Win Rate: 0.38, Ep



Resetting epsilon to 1.0
Episode: 7000, Win Rate: 0.38, Epsilon: 0.011, smartmove: 0.19
Wins: 2693, Losses: 3017, Draws: 1291
Checkpoint saved at episode 7000
Episode: 7100, Win Rate: 0.38, Epsilon: 0.901, smartmove: 0.20
Wins: 2717, Losses: 3082, Draws: 1302
Episode: 7200, Win Rate: 0.38, Epsilon: 0.802, smartmove: 0.20
Wins: 2749, Losses: 3143, Draws: 1309
Episode: 7300, Win Rate: 0.38, Epsilon: 0.703, smartmove: 0.20
Wins: 2771, Losses: 3198, Draws: 1332
Episode: 7400, Win Rate: 0.38, Epsilon: 0.604, smartmove: 0.21
Wins: 2801, Losses: 3248, Draws: 1352
Episode: 7500, Win Rate: 0.38, Epsilon: 0.505, smartmove: 0.21
Wins: 2837, Losses: 3293, Draws: 1371
Episode: 7600, Win Rate: 0.38, Epsilon: 0.406, smartmove: 0.21
Wins: 2869, Losses: 3339, Draws: 1393
Episode: 7700, Win Rate: 0.38, Epsilon: 0.307, smartmove: 0.21
Wins: 2912, Losses: 3374, Draws: 1415
Episode: 7800, Win Rate: 0.38, Epsilon: 0.208, smartmove: 0.22
Wins: 2951, Losses: 3400, Draws: 1450
Episode: 7900, Win Rate: 0.38, Ep



Resetting epsilon to 1.0
Episode: 8000, Win Rate: 0.38, Epsilon: 0.011, smartmove: 0.22
Wins: 3042, Losses: 3459, Draws: 1500
Checkpoint saved at episode 8000
Episode: 8100, Win Rate: 0.38, Epsilon: 0.901, smartmove: 0.23
Wins: 3070, Losses: 3518, Draws: 1513
Episode: 8200, Win Rate: 0.38, Epsilon: 0.802, smartmove: 0.23
Wins: 3092, Losses: 3586, Draws: 1523
Episode: 8300, Win Rate: 0.38, Epsilon: 0.703, smartmove: 0.23
Wins: 3122, Losses: 3646, Draws: 1533
Episode: 8400, Win Rate: 0.37, Epsilon: 0.604, smartmove: 0.23
Wins: 3148, Losses: 3698, Draws: 1555
Episode: 8500, Win Rate: 0.37, Epsilon: 0.505, smartmove: 0.24
Wins: 3177, Losses: 3746, Draws: 1578
Episode: 8600, Win Rate: 0.37, Epsilon: 0.406, smartmove: 0.24
Wins: 3199, Losses: 3803, Draws: 1599
Episode: 8700, Win Rate: 0.37, Epsilon: 0.307, smartmove: 0.24
Wins: 3245, Losses: 3833, Draws: 1623
Episode: 8800, Win Rate: 0.37, Epsilon: 0.208, smartmove: 0.24
Wins: 3277, Losses: 3869, Draws: 1655
Episode: 8900, Win Rate: 0.37, Ep



Resetting epsilon to 1.0
Episode: 9000, Win Rate: 0.37, Epsilon: 0.011, smartmove: 0.25
Wins: 3372, Losses: 3925, Draws: 1704
Checkpoint saved at episode 9000
Episode: 9100, Win Rate: 0.37, Epsilon: 0.901, smartmove: 0.25
Wins: 3398, Losses: 3982, Draws: 1721
Episode: 9200, Win Rate: 0.37, Epsilon: 0.802, smartmove: 0.26
Wins: 3423, Losses: 4046, Draws: 1732
Episode: 9300, Win Rate: 0.37, Epsilon: 0.703, smartmove: 0.26
Wins: 3447, Losses: 4102, Draws: 1752
Episode: 9400, Win Rate: 0.37, Epsilon: 0.604, smartmove: 0.26
Wins: 3469, Losses: 4156, Draws: 1776
Episode: 9500, Win Rate: 0.37, Epsilon: 0.505, smartmove: 0.26
Wins: 3495, Losses: 4212, Draws: 1794
Episode: 9600, Win Rate: 0.37, Epsilon: 0.406, smartmove: 0.27
Wins: 3517, Losses: 4262, Draws: 1822
Episode: 9700, Win Rate: 0.37, Epsilon: 0.307, smartmove: 0.27
Wins: 3555, Losses: 4292, Draws: 1854
Episode: 9800, Win Rate: 0.37, Epsilon: 0.208, smartmove: 0.27
Wins: 3595, Losses: 4332, Draws: 1874
Episode: 9900, Win Rate: 0.37, Ep



Resetting epsilon to 1.0
Episode: 10000, Win Rate: 0.37, Epsilon: 0.011, smartmove: 0.28
Wins: 3673, Losses: 4407, Draws: 1921
Checkpoint saved at episode 10000
Episode: 10100, Win Rate: 0.37, Epsilon: 0.901, smartmove: 0.28
Wins: 3692, Losses: 4478, Draws: 1931
Episode: 10200, Win Rate: 0.36, Epsilon: 0.802, smartmove: 0.28
Wins: 3719, Losses: 4545, Draws: 1937
Episode: 10300, Win Rate: 0.36, Epsilon: 0.703, smartmove: 0.29
Wins: 3747, Losses: 4602, Draws: 1952
Episode: 10400, Win Rate: 0.36, Epsilon: 0.604, smartmove: 0.29
Wins: 3773, Losses: 4657, Draws: 1971
Episode: 10500, Win Rate: 0.36, Epsilon: 0.505, smartmove: 0.29
Wins: 3810, Losses: 4700, Draws: 1991
Episode: 10600, Win Rate: 0.36, Epsilon: 0.406, smartmove: 0.29
Wins: 3848, Losses: 4744, Draws: 2009
Episode: 10700, Win Rate: 0.36, Epsilon: 0.307, smartmove: 0.30
Wins: 3881, Losses: 4792, Draws: 2028
Episode: 10800, Win Rate: 0.36, Epsilon: 0.208, smartmove: 0.30
Wins: 3912, Losses: 4839, Draws: 2050
Episode: 10900, Win Rat



Resetting epsilon to 1.0
Episode: 11000, Win Rate: 0.36, Epsilon: 0.011, smartmove: 0.31
Wins: 3990, Losses: 4907, Draws: 2104
Checkpoint saved at episode 11000
Episode: 11100, Win Rate: 0.36, Epsilon: 0.901, smartmove: 0.31
Wins: 4009, Losses: 4981, Draws: 2111
Episode: 11200, Win Rate: 0.36, Epsilon: 0.802, smartmove: 0.31
Wins: 4024, Losses: 5051, Draws: 2126
Episode: 11300, Win Rate: 0.36, Epsilon: 0.703, smartmove: 0.31
Wins: 4043, Losses: 5113, Draws: 2145
Episode: 11400, Win Rate: 0.36, Epsilon: 0.604, smartmove: 0.32
Wins: 4071, Losses: 5167, Draws: 2163
Episode: 11500, Win Rate: 0.36, Epsilon: 0.505, smartmove: 0.32
Wins: 4096, Losses: 5220, Draws: 2185
Episode: 11600, Win Rate: 0.36, Epsilon: 0.406, smartmove: 0.32
Wins: 4126, Losses: 5269, Draws: 2206
Episode: 11700, Win Rate: 0.36, Epsilon: 0.307, smartmove: 0.33
Wins: 4162, Losses: 5309, Draws: 2230
Episode: 11800, Win Rate: 0.36, Epsilon: 0.208, smartmove: 0.33
Wins: 4196, Losses: 5352, Draws: 2253
Episode: 11900, Win Rat



Resetting epsilon to 1.0
Episode: 12000, Win Rate: 0.36, Epsilon: 0.011, smartmove: 0.33
Wins: 4267, Losses: 5413, Draws: 2321
Checkpoint saved at episode 12000
Episode: 12100, Win Rate: 0.35, Epsilon: 0.901, smartmove: 0.34
Wins: 4281, Losses: 5489, Draws: 2331
Episode: 12200, Win Rate: 0.35, Epsilon: 0.802, smartmove: 0.34
Wins: 4298, Losses: 5557, Draws: 2346
Episode: 12300, Win Rate: 0.35, Epsilon: 0.703, smartmove: 0.34
Wins: 4326, Losses: 5617, Draws: 2358
Episode: 12400, Win Rate: 0.35, Epsilon: 0.604, smartmove: 0.34
Wins: 4354, Losses: 5674, Draws: 2373
Episode: 12500, Win Rate: 0.35, Epsilon: 0.505, smartmove: 0.35
Wins: 4385, Losses: 5724, Draws: 2392
Episode: 12600, Win Rate: 0.35, Epsilon: 0.406, smartmove: 0.35
Wins: 4422, Losses: 5771, Draws: 2408
Episode: 12700, Win Rate: 0.35, Epsilon: 0.307, smartmove: 0.35
Wins: 4449, Losses: 5817, Draws: 2435
Episode: 12800, Win Rate: 0.35, Epsilon: 0.208, smartmove: 0.36
Wins: 4495, Losses: 5849, Draws: 2457
Episode: 12900, Win Rat



Resetting epsilon to 1.0
Episode: 13000, Win Rate: 0.35, Epsilon: 0.011, smartmove: 0.36
Wins: 4582, Losses: 5915, Draws: 2504
Checkpoint saved at episode 13000
Episode: 13100, Win Rate: 0.35, Epsilon: 0.901, smartmove: 0.36
Wins: 4604, Losses: 5980, Draws: 2517
Episode: 13200, Win Rate: 0.35, Epsilon: 0.802, smartmove: 0.37
Wins: 4627, Losses: 6042, Draws: 2532
Episode: 13300, Win Rate: 0.35, Epsilon: 0.703, smartmove: 0.37
Wins: 4657, Losses: 6100, Draws: 2544
Episode: 13400, Win Rate: 0.35, Epsilon: 0.604, smartmove: 0.37
Wins: 4685, Losses: 6158, Draws: 2558
Episode: 13500, Win Rate: 0.35, Epsilon: 0.505, smartmove: 0.38
Wins: 4713, Losses: 6207, Draws: 2581
Episode: 13600, Win Rate: 0.35, Epsilon: 0.406, smartmove: 0.38
Wins: 4743, Losses: 6258, Draws: 2600
Episode: 13700, Win Rate: 0.35, Epsilon: 0.307, smartmove: 0.38
Wins: 4777, Losses: 6298, Draws: 2626
Episode: 13800, Win Rate: 0.35, Epsilon: 0.208, smartmove: 0.38
Wins: 4812, Losses: 6332, Draws: 2657
Episode: 13900, Win Rat



Resetting epsilon to 1.0
Episode: 14000, Win Rate: 0.35, Epsilon: 0.011, smartmove: 0.39
Wins: 4889, Losses: 6393, Draws: 2719
Checkpoint saved at episode 14000
Episode: 14100, Win Rate: 0.35, Epsilon: 0.901, smartmove: 0.39
Wins: 4912, Losses: 6457, Draws: 2732
Episode: 14200, Win Rate: 0.35, Epsilon: 0.802, smartmove: 0.39
Wins: 4932, Losses: 6524, Draws: 2745
Episode: 14300, Win Rate: 0.35, Epsilon: 0.703, smartmove: 0.40
Wins: 4958, Losses: 6581, Draws: 2762
Episode: 14400, Win Rate: 0.35, Epsilon: 0.604, smartmove: 0.40
Wins: 4991, Losses: 6640, Draws: 2770
Episode: 14500, Win Rate: 0.35, Epsilon: 0.505, smartmove: 0.40
Wins: 5016, Losses: 6698, Draws: 2787
Episode: 14600, Win Rate: 0.35, Epsilon: 0.406, smartmove: 0.41
Wins: 5044, Losses: 6744, Draws: 2813
Episode: 14700, Win Rate: 0.35, Epsilon: 0.307, smartmove: 0.41
Wins: 5078, Losses: 6786, Draws: 2837
Episode: 14800, Win Rate: 0.35, Epsilon: 0.208, smartmove: 0.41
Wins: 5113, Losses: 6819, Draws: 2869
Episode: 14900, Win Rat



Resetting epsilon to 1.0
Episode: 15000, Win Rate: 0.35, Epsilon: 0.011, smartmove: 0.42
Wins: 5188, Losses: 6867, Draws: 2946
Checkpoint saved at episode 15000
Episode: 15100, Win Rate: 0.35, Epsilon: 0.901, smartmove: 0.42
Wins: 5210, Losses: 6928, Draws: 2963
Episode: 15200, Win Rate: 0.34, Epsilon: 0.802, smartmove: 0.42
Wins: 5229, Losses: 6994, Draws: 2978
Episode: 15300, Win Rate: 0.34, Epsilon: 0.703, smartmove: 0.42
Wins: 5251, Losses: 7053, Draws: 2997
Episode: 15400, Win Rate: 0.34, Epsilon: 0.604, smartmove: 0.43
Wins: 5281, Losses: 7113, Draws: 3007
Episode: 15500, Win Rate: 0.34, Epsilon: 0.505, smartmove: 0.43
Wins: 5312, Losses: 7166, Draws: 3023
Episode: 15600, Win Rate: 0.34, Epsilon: 0.406, smartmove: 0.43
Wins: 5334, Losses: 7217, Draws: 3050
Episode: 15700, Win Rate: 0.34, Epsilon: 0.307, smartmove: 0.44
Wins: 5380, Losses: 7259, Draws: 3062
Episode: 15800, Win Rate: 0.34, Epsilon: 0.208, smartmove: 0.44
Wins: 5423, Losses: 7293, Draws: 3085
Episode: 15900, Win Rat



Resetting epsilon to 1.0
Episode: 16000, Win Rate: 0.34, Epsilon: 0.011, smartmove: 0.44
Wins: 5496, Losses: 7353, Draws: 3152
Checkpoint saved at episode 16000
Episode: 16100, Win Rate: 0.34, Epsilon: 0.901, smartmove: 0.45
Wins: 5513, Losses: 7420, Draws: 3168
Episode: 16200, Win Rate: 0.34, Epsilon: 0.802, smartmove: 0.45
Wins: 5527, Losses: 7499, Draws: 3175
Episode: 16300, Win Rate: 0.34, Epsilon: 0.703, smartmove: 0.45
Wins: 5551, Losses: 7560, Draws: 3190
Episode: 16400, Win Rate: 0.34, Epsilon: 0.604, smartmove: 0.46
Wins: 5581, Losses: 7609, Draws: 3211
Episode: 16500, Win Rate: 0.34, Epsilon: 0.505, smartmove: 0.46
Wins: 5604, Losses: 7665, Draws: 3232
Episode: 16600, Win Rate: 0.34, Epsilon: 0.406, smartmove: 0.46
Wins: 5629, Losses: 7715, Draws: 3257
Episode: 16700, Win Rate: 0.34, Epsilon: 0.307, smartmove: 0.46
Wins: 5659, Losses: 7759, Draws: 3283
Episode: 16800, Win Rate: 0.34, Epsilon: 0.208, smartmove: 0.47
Wins: 5689, Losses: 7801, Draws: 3311
Episode: 16900, Win Rat



Resetting epsilon to 1.0
Episode: 17000, Win Rate: 0.34, Epsilon: 0.011, smartmove: 0.47
Wins: 5763, Losses: 7841, Draws: 3397
Checkpoint saved at episode 17000
Episode: 17100, Win Rate: 0.34, Epsilon: 0.901, smartmove: 0.47
Wins: 5775, Losses: 7921, Draws: 3405
Episode: 17200, Win Rate: 0.34, Epsilon: 0.802, smartmove: 0.48
Wins: 5791, Losses: 7991, Draws: 3419
Episode: 17300, Win Rate: 0.34, Epsilon: 0.703, smartmove: 0.48
Wins: 5812, Losses: 8049, Draws: 3440
Episode: 17400, Win Rate: 0.34, Epsilon: 0.604, smartmove: 0.48
Wins: 5831, Losses: 8113, Draws: 3457
Episode: 17500, Win Rate: 0.33, Epsilon: 0.505, smartmove: 0.49
Wins: 5853, Losses: 8170, Draws: 3478
Episode: 17600, Win Rate: 0.33, Epsilon: 0.406, smartmove: 0.49
Wins: 5884, Losses: 8223, Draws: 3494
Episode: 17700, Win Rate: 0.33, Epsilon: 0.307, smartmove: 0.49
Wins: 5907, Losses: 8268, Draws: 3526
Episode: 17800, Win Rate: 0.33, Epsilon: 0.208, smartmove: 0.49
Wins: 5929, Losses: 8316, Draws: 3556
Episode: 17900, Win Rat



Resetting epsilon to 1.0
Episode: 18000, Win Rate: 0.33, Epsilon: 0.011, smartmove: 0.50
Wins: 5995, Losses: 8371, Draws: 3635
Checkpoint saved at episode 18000
Episode: 18100, Win Rate: 0.33, Epsilon: 0.901, smartmove: 0.50
Wins: 6009, Losses: 8443, Draws: 3649
Episode: 18200, Win Rate: 0.33, Epsilon: 0.802, smartmove: 0.51
Wins: 6029, Losses: 8506, Draws: 3666
Episode: 18300, Win Rate: 0.33, Epsilon: 0.703, smartmove: 0.51
Wins: 6046, Losses: 8574, Draws: 3681
Episode: 18400, Win Rate: 0.33, Epsilon: 0.604, smartmove: 0.51
Wins: 6072, Losses: 8629, Draws: 3700
Episode: 18500, Win Rate: 0.33, Epsilon: 0.505, smartmove: 0.51
Wins: 6090, Losses: 8688, Draws: 3723
Episode: 18600, Win Rate: 0.33, Epsilon: 0.406, smartmove: 0.52
Wins: 6118, Losses: 8739, Draws: 3744
Episode: 18700, Win Rate: 0.33, Epsilon: 0.307, smartmove: 0.52
Wins: 6154, Losses: 8777, Draws: 3770
Episode: 18800, Win Rate: 0.33, Epsilon: 0.208, smartmove: 0.52
Wins: 6180, Losses: 8820, Draws: 3801
Episode: 18900, Win Rat



Resetting epsilon to 1.0
Episode: 19000, Win Rate: 0.33, Epsilon: 0.011, smartmove: 0.53
Wins: 6243, Losses: 8859, Draws: 3899
Checkpoint saved at episode 19000
Episode: 19100, Win Rate: 0.33, Epsilon: 0.901, smartmove: 0.53
Wins: 6254, Losses: 8936, Draws: 3911
Episode: 19200, Win Rate: 0.33, Epsilon: 0.802, smartmove: 0.53
Wins: 6272, Losses: 9000, Draws: 3929
Episode: 19300, Win Rate: 0.33, Epsilon: 0.703, smartmove: 0.54
Wins: 6291, Losses: 9066, Draws: 3944
Episode: 19400, Win Rate: 0.33, Epsilon: 0.604, smartmove: 0.54
Wins: 6319, Losses: 9122, Draws: 3960
Episode: 19500, Win Rate: 0.32, Epsilon: 0.505, smartmove: 0.54
Wins: 6337, Losses: 9178, Draws: 3986
Episode: 19600, Win Rate: 0.32, Epsilon: 0.406, smartmove: 0.54
Wins: 6354, Losses: 9237, Draws: 4010
Episode: 19700, Win Rate: 0.32, Epsilon: 0.307, smartmove: 0.55
Wins: 6375, Losses: 9291, Draws: 4035
Episode: 19800, Win Rate: 0.32, Epsilon: 0.208, smartmove: 0.55
Wins: 6396, Losses: 9333, Draws: 4072
Episode: 19900, Win Rat



Resetting epsilon to 1.0
Episode: 20000, Win Rate: 0.32, Epsilon: 0.011, smartmove: 0.56
Wins: 6456, Losses: 9395, Draws: 4150
Checkpoint saved at episode 20000
Episode: 20100, Win Rate: 0.32, Epsilon: 0.901, smartmove: 0.56
Wins: 6467, Losses: 9475, Draws: 4159
Episode: 20200, Win Rate: 0.32, Epsilon: 0.802, smartmove: 0.56
Wins: 6482, Losses: 9545, Draws: 4174
Episode: 20300, Win Rate: 0.32, Epsilon: 0.703, smartmove: 0.56
Wins: 6495, Losses: 9611, Draws: 4195
Episode: 20400, Win Rate: 0.32, Epsilon: 0.604, smartmove: 0.57
Wins: 6506, Losses: 9672, Draws: 4223
Episode: 20500, Win Rate: 0.32, Epsilon: 0.505, smartmove: 0.57
Wins: 6533, Losses: 9719, Draws: 4249
Episode: 20600, Win Rate: 0.32, Epsilon: 0.406, smartmove: 0.57
Wins: 6559, Losses: 9767, Draws: 4275
Episode: 20700, Win Rate: 0.32, Epsilon: 0.307, smartmove: 0.57
Wins: 6584, Losses: 9808, Draws: 4309
Episode: 20800, Win Rate: 0.32, Epsilon: 0.208, smartmove: 0.58
Wins: 6616, Losses: 9845, Draws: 4340
Episode: 20900, Win Rat



Resetting epsilon to 1.0
Episode: 21000, Win Rate: 0.32, Epsilon: 0.011, smartmove: 0.58
Wins: 6684, Losses: 9890, Draws: 4427
Checkpoint saved at episode 21000
Episode: 21100, Win Rate: 0.32, Epsilon: 0.901, smartmove: 0.59
Wins: 6697, Losses: 9960, Draws: 4444
Episode: 21200, Win Rate: 0.32, Epsilon: 0.802, smartmove: 0.59
Wins: 6713, Losses: 10031, Draws: 4457
Episode: 21300, Win Rate: 0.32, Epsilon: 0.703, smartmove: 0.59
Wins: 6727, Losses: 10102, Draws: 4472
Episode: 21400, Win Rate: 0.31, Epsilon: 0.604, smartmove: 0.59
Wins: 6740, Losses: 10162, Draws: 4499
Episode: 21500, Win Rate: 0.31, Epsilon: 0.505, smartmove: 0.60
Wins: 6763, Losses: 10212, Draws: 4526
Episode: 21600, Win Rate: 0.31, Epsilon: 0.406, smartmove: 0.60
Wins: 6787, Losses: 10261, Draws: 4553
Episode: 21700, Win Rate: 0.31, Epsilon: 0.307, smartmove: 0.60
Wins: 6815, Losses: 10304, Draws: 4582
Episode: 21800, Win Rate: 0.31, Epsilon: 0.208, smartmove: 0.61
Wins: 6845, Losses: 10335, Draws: 4621
Episode: 21900, 



Resetting epsilon to 1.0
Episode: 22000, Win Rate: 0.31, Epsilon: 0.011, smartmove: 0.61
Wins: 6914, Losses: 10375, Draws: 4712
Checkpoint saved at episode 22000
Episode: 22100, Win Rate: 0.31, Epsilon: 0.901, smartmove: 0.61
Wins: 6926, Losses: 10451, Draws: 4724
Episode: 22200, Win Rate: 0.31, Epsilon: 0.802, smartmove: 0.62
Wins: 6943, Losses: 10521, Draws: 4737
Episode: 22300, Win Rate: 0.31, Epsilon: 0.703, smartmove: 0.62
Wins: 6959, Losses: 10585, Draws: 4757
Episode: 22400, Win Rate: 0.31, Epsilon: 0.604, smartmove: 0.62
Wins: 6979, Losses: 10648, Draws: 4774
Episode: 22500, Win Rate: 0.31, Epsilon: 0.505, smartmove: 0.62
Wins: 6994, Losses: 10705, Draws: 4802
Episode: 22600, Win Rate: 0.31, Epsilon: 0.406, smartmove: 0.63
Wins: 7016, Losses: 10759, Draws: 4826
Episode: 22700, Win Rate: 0.31, Epsilon: 0.307, smartmove: 0.63
Wins: 7055, Losses: 10796, Draws: 4850
Episode: 22800, Win Rate: 0.31, Epsilon: 0.208, smartmove: 0.63
Wins: 7084, Losses: 10834, Draws: 4883
Episode: 22900



Resetting epsilon to 1.0
Episode: 23000, Win Rate: 0.31, Epsilon: 0.011, smartmove: 0.64
Wins: 7138, Losses: 10874, Draws: 4989
Checkpoint saved at episode 23000
Episode: 23100, Win Rate: 0.31, Epsilon: 0.901, smartmove: 0.64
Wins: 7147, Losses: 10958, Draws: 4996
Episode: 23200, Win Rate: 0.31, Epsilon: 0.802, smartmove: 0.64
Wins: 7160, Losses: 11031, Draws: 5010
Episode: 23300, Win Rate: 0.31, Epsilon: 0.703, smartmove: 0.65
Wins: 7177, Losses: 11091, Draws: 5033
Episode: 23400, Win Rate: 0.31, Epsilon: 0.604, smartmove: 0.65
Wins: 7192, Losses: 11154, Draws: 5055
Episode: 23500, Win Rate: 0.31, Epsilon: 0.505, smartmove: 0.65
Wins: 7213, Losses: 11203, Draws: 5085
Episode: 23600, Win Rate: 0.31, Epsilon: 0.406, smartmove: 0.66
Wins: 7228, Losses: 11250, Draws: 5123
Episode: 23700, Win Rate: 0.31, Epsilon: 0.307, smartmove: 0.66
Wins: 7258, Losses: 11295, Draws: 5148
Episode: 23800, Win Rate: 0.31, Epsilon: 0.208, smartmove: 0.66
Wins: 7285, Losses: 11326, Draws: 5190
Episode: 23900



Resetting epsilon to 1.0
Episode: 24000, Win Rate: 0.31, Epsilon: 0.011, smartmove: 0.67
Wins: 7348, Losses: 11373, Draws: 5280
Checkpoint saved at episode 24000
Episode: 24100, Win Rate: 0.31, Epsilon: 0.901, smartmove: 0.67
Wins: 7363, Losses: 11451, Draws: 5287
Episode: 24200, Win Rate: 0.30, Epsilon: 0.802, smartmove: 0.67
Wins: 7377, Losses: 11521, Draws: 5303
Episode: 24300, Win Rate: 0.30, Epsilon: 0.703, smartmove: 0.68
Wins: 7391, Losses: 11589, Draws: 5321
Episode: 24400, Win Rate: 0.30, Epsilon: 0.604, smartmove: 0.68
Wins: 7406, Losses: 11658, Draws: 5337
Episode: 24500, Win Rate: 0.30, Epsilon: 0.505, smartmove: 0.68
Wins: 7431, Losses: 11710, Draws: 5360
Episode: 24600, Win Rate: 0.30, Epsilon: 0.406, smartmove: 0.68
Wins: 7449, Losses: 11762, Draws: 5390
Episode: 24700, Win Rate: 0.30, Epsilon: 0.307, smartmove: 0.69
Wins: 7473, Losses: 11809, Draws: 5419
Episode: 24800, Win Rate: 0.30, Epsilon: 0.208, smartmove: 0.69
Wins: 7498, Losses: 11849, Draws: 5454
Episode: 24900



Resetting epsilon to 1.0
Episode: 25000, Win Rate: 0.30, Epsilon: 0.011, smartmove: 0.69
Wins: 7543, Losses: 11899, Draws: 5559
Checkpoint saved at episode 25000
Episode: 25100, Win Rate: 0.30, Epsilon: 0.901, smartmove: 0.70
Wins: 7552, Losses: 11980, Draws: 5569
Episode: 25200, Win Rate: 0.30, Epsilon: 0.802, smartmove: 0.70
Wins: 7568, Losses: 12052, Draws: 5581
Episode: 25300, Win Rate: 0.30, Epsilon: 0.703, smartmove: 0.70
Wins: 7578, Losses: 12127, Draws: 5596
Episode: 25400, Win Rate: 0.30, Epsilon: 0.604, smartmove: 0.71
Wins: 7591, Losses: 12200, Draws: 5610
Episode: 25500, Win Rate: 0.30, Epsilon: 0.505, smartmove: 0.71
Wins: 7606, Losses: 12263, Draws: 5632
Episode: 25600, Win Rate: 0.30, Epsilon: 0.406, smartmove: 0.71
Wins: 7623, Losses: 12321, Draws: 5657
Episode: 25700, Win Rate: 0.30, Epsilon: 0.307, smartmove: 0.71
Wins: 7654, Losses: 12365, Draws: 5682
Episode: 25800, Win Rate: 0.30, Epsilon: 0.208, smartmove: 0.72
Wins: 7673, Losses: 12404, Draws: 5724
Episode: 25900



Resetting epsilon to 1.0
Episode: 26000, Win Rate: 0.30, Epsilon: 0.011, smartmove: 0.72
Wins: 7715, Losses: 12459, Draws: 5827
Checkpoint saved at episode 26000
Episode: 26100, Win Rate: 0.30, Epsilon: 0.901, smartmove: 0.72
Wins: 7730, Losses: 12536, Draws: 5835
Episode: 26200, Win Rate: 0.30, Epsilon: 0.802, smartmove: 0.73
Wins: 7743, Losses: 12609, Draws: 5849
Episode: 26300, Win Rate: 0.29, Epsilon: 0.703, smartmove: 0.73
Wins: 7751, Losses: 12690, Draws: 5860
Episode: 26400, Win Rate: 0.29, Epsilon: 0.604, smartmove: 0.73
Wins: 7764, Losses: 12758, Draws: 5879
Episode: 26500, Win Rate: 0.29, Epsilon: 0.505, smartmove: 0.74
Wins: 7774, Losses: 12819, Draws: 5908
Episode: 26600, Win Rate: 0.29, Epsilon: 0.406, smartmove: 0.74
Wins: 7793, Losses: 12867, Draws: 5941
Episode: 26700, Win Rate: 0.29, Epsilon: 0.307, smartmove: 0.74
Wins: 7814, Losses: 12914, Draws: 5973
Episode: 26800, Win Rate: 0.29, Epsilon: 0.208, smartmove: 0.74
Wins: 7831, Losses: 12953, Draws: 6017
Episode: 26900



Resetting epsilon to 1.0
Episode: 27000, Win Rate: 0.29, Epsilon: 0.011, smartmove: 0.75
Wins: 7874, Losses: 13012, Draws: 6115
Checkpoint saved at episode 27000
Episode: 27100, Win Rate: 0.29, Epsilon: 0.901, smartmove: 0.75
Wins: 7884, Losses: 13092, Draws: 6125
Episode: 27200, Win Rate: 0.29, Epsilon: 0.802, smartmove: 0.76
Wins: 7897, Losses: 13163, Draws: 6141
Episode: 27300, Win Rate: 0.29, Epsilon: 0.703, smartmove: 0.76
Wins: 7911, Losses: 13228, Draws: 6162
Episode: 27400, Win Rate: 0.29, Epsilon: 0.604, smartmove: 0.76
Wins: 7931, Losses: 13286, Draws: 6184
Episode: 27500, Win Rate: 0.29, Epsilon: 0.505, smartmove: 0.76
Wins: 7943, Losses: 13351, Draws: 6207
Episode: 27600, Win Rate: 0.29, Epsilon: 0.406, smartmove: 0.77
Wins: 7959, Losses: 13401, Draws: 6241
Episode: 27700, Win Rate: 0.29, Epsilon: 0.307, smartmove: 0.77
Wins: 7974, Losses: 13442, Draws: 6285
Episode: 27800, Win Rate: 0.29, Epsilon: 0.208, smartmove: 0.77
Wins: 8003, Losses: 13469, Draws: 6329
Episode: 27900



Resetting epsilon to 1.0
Episode: 28000, Win Rate: 0.29, Epsilon: 0.011, smartmove: 0.78
Wins: 8043, Losses: 13529, Draws: 6429
Checkpoint saved at episode 28000
Episode: 28100, Win Rate: 0.29, Epsilon: 0.901, smartmove: 0.78
Wins: 8053, Losses: 13609, Draws: 6439
Episode: 28200, Win Rate: 0.29, Epsilon: 0.802, smartmove: 0.78
Wins: 8058, Losses: 13691, Draws: 6452
Episode: 28300, Win Rate: 0.29, Epsilon: 0.703, smartmove: 0.79
Wins: 8068, Losses: 13760, Draws: 6473
Episode: 28400, Win Rate: 0.28, Epsilon: 0.604, smartmove: 0.79
Wins: 8083, Losses: 13824, Draws: 6494
Episode: 28500, Win Rate: 0.28, Epsilon: 0.505, smartmove: 0.79
Wins: 8097, Losses: 13886, Draws: 6518
Episode: 28600, Win Rate: 0.28, Epsilon: 0.406, smartmove: 0.79
Wins: 8104, Losses: 13948, Draws: 6549
Episode: 28700, Win Rate: 0.28, Epsilon: 0.307, smartmove: 0.80
Wins: 8116, Losses: 13997, Draws: 6588
Episode: 28800, Win Rate: 0.28, Epsilon: 0.208, smartmove: 0.80
Wins: 8128, Losses: 14049, Draws: 6624
Episode: 28900



Resetting epsilon to 1.0
Episode: 29000, Win Rate: 0.28, Epsilon: 0.011, smartmove: 0.81
Wins: 8177, Losses: 14118, Draws: 6706
Checkpoint saved at episode 29000
Episode: 29100, Win Rate: 0.28, Epsilon: 0.901, smartmove: 0.81
Wins: 8187, Losses: 14195, Draws: 6719
Episode: 29200, Win Rate: 0.28, Epsilon: 0.802, smartmove: 0.81
Wins: 8189, Losses: 14279, Draws: 6733
Episode: 29300, Win Rate: 0.28, Epsilon: 0.703, smartmove: 0.81
Wins: 8196, Losses: 14351, Draws: 6754
Episode: 29400, Win Rate: 0.28, Epsilon: 0.604, smartmove: 0.82
Wins: 8204, Losses: 14419, Draws: 6778
Episode: 29500, Win Rate: 0.28, Epsilon: 0.505, smartmove: 0.82
Wins: 8218, Losses: 14480, Draws: 6803
Episode: 29600, Win Rate: 0.28, Epsilon: 0.406, smartmove: 0.82
Wins: 8239, Losses: 14528, Draws: 6834
Episode: 29700, Win Rate: 0.28, Epsilon: 0.307, smartmove: 0.82
Wins: 8258, Losses: 14576, Draws: 6867
Episode: 29800, Win Rate: 0.28, Epsilon: 0.208, smartmove: 0.83
Wins: 8284, Losses: 14623, Draws: 6894
Episode: 29900



Resetting epsilon to 1.0
Episode: 30000, Win Rate: 0.28, Epsilon: 0.011, smartmove: 0.83
Wins: 8314, Losses: 14705, Draws: 6982
Checkpoint saved at episode 30000
Episode: 30100, Win Rate: 0.28, Epsilon: 0.901, smartmove: 0.84
Wins: 8323, Losses: 14790, Draws: 6988
Episode: 30200, Win Rate: 0.28, Epsilon: 0.802, smartmove: 0.84
Wins: 8329, Losses: 14865, Draws: 7007
Episode: 30300, Win Rate: 0.28, Epsilon: 0.703, smartmove: 0.84
Wins: 8341, Losses: 14933, Draws: 7027
Episode: 30400, Win Rate: 0.27, Epsilon: 0.604, smartmove: 0.84
Wins: 8348, Losses: 15007, Draws: 7046
Episode: 30500, Win Rate: 0.27, Epsilon: 0.505, smartmove: 0.85
Wins: 8361, Losses: 15072, Draws: 7068
Episode: 30600, Win Rate: 0.27, Epsilon: 0.406, smartmove: 0.85
Wins: 8373, Losses: 15128, Draws: 7100
Episode: 30700, Win Rate: 0.27, Epsilon: 0.307, smartmove: 0.85
Wins: 8386, Losses: 15181, Draws: 7134
Episode: 30800, Win Rate: 0.27, Epsilon: 0.208, smartmove: 0.86
Wins: 8399, Losses: 15233, Draws: 7169
Episode: 30900



Resetting epsilon to 1.0
Episode: 31000, Win Rate: 0.27, Epsilon: 0.011, smartmove: 0.86
Wins: 8444, Losses: 15302, Draws: 7255
Checkpoint saved at episode 31000
Episode: 31100, Win Rate: 0.27, Epsilon: 0.901, smartmove: 0.86
Wins: 8447, Losses: 15387, Draws: 7267
Episode: 31200, Win Rate: 0.27, Epsilon: 0.802, smartmove: 0.87
Wins: 8455, Losses: 15468, Draws: 7278
Episode: 31300, Win Rate: 0.27, Epsilon: 0.703, smartmove: 0.87
Wins: 8460, Losses: 15538, Draws: 7303
Episode: 31400, Win Rate: 0.27, Epsilon: 0.604, smartmove: 0.87
Wins: 8467, Losses: 15606, Draws: 7328
Episode: 31500, Win Rate: 0.27, Epsilon: 0.505, smartmove: 0.88
Wins: 8474, Losses: 15673, Draws: 7354
Episode: 31600, Win Rate: 0.27, Epsilon: 0.406, smartmove: 0.88
Wins: 8487, Losses: 15729, Draws: 7385
Episode: 31700, Win Rate: 0.27, Epsilon: 0.307, smartmove: 0.88
Wins: 8502, Losses: 15783, Draws: 7416
Episode: 31800, Win Rate: 0.27, Epsilon: 0.208, smartmove: 0.88
Wins: 8514, Losses: 15834, Draws: 7453
Episode: 31900



Resetting epsilon to 1.0
Episode: 32000, Win Rate: 0.27, Epsilon: 0.011, smartmove: 0.89
Wins: 8549, Losses: 15901, Draws: 7551
Checkpoint saved at episode 32000
Episode: 32100, Win Rate: 0.27, Epsilon: 0.901, smartmove: 0.89
Wins: 8552, Losses: 15987, Draws: 7562
Episode: 32200, Win Rate: 0.27, Epsilon: 0.802, smartmove: 0.89
Wins: 8556, Losses: 16064, Draws: 7581
Episode: 32300, Win Rate: 0.27, Epsilon: 0.703, smartmove: 0.90
Wins: 8563, Losses: 16144, Draws: 7594
Episode: 32400, Win Rate: 0.26, Epsilon: 0.604, smartmove: 0.90
Wins: 8569, Losses: 16214, Draws: 7618
Episode: 32500, Win Rate: 0.26, Epsilon: 0.505, smartmove: 0.90
Wins: 8580, Losses: 16281, Draws: 7640
Episode: 32600, Win Rate: 0.26, Epsilon: 0.406, smartmove: 0.91
Wins: 8586, Losses: 16346, Draws: 7669
Episode: 32700, Win Rate: 0.26, Epsilon: 0.307, smartmove: 0.91
Wins: 8602, Losses: 16397, Draws: 7702
Episode: 32800, Win Rate: 0.26, Epsilon: 0.208, smartmove: 0.91
Wins: 8609, Losses: 16447, Draws: 7745
Episode: 32900



Resetting epsilon to 1.0
Episode: 33000, Win Rate: 0.26, Epsilon: 0.011, smartmove: 0.92
Wins: 8644, Losses: 16518, Draws: 7839
Checkpoint saved at episode 33000
Episode: 33100, Win Rate: 0.26, Epsilon: 0.901, smartmove: 0.92
Wins: 8647, Losses: 16607, Draws: 7847
Episode: 33200, Win Rate: 0.26, Epsilon: 0.802, smartmove: 0.92
Wins: 8655, Losses: 16685, Draws: 7861
Episode: 33300, Win Rate: 0.26, Epsilon: 0.703, smartmove: 0.93
Wins: 8661, Losses: 16762, Draws: 7878
Episode: 33400, Win Rate: 0.26, Epsilon: 0.604, smartmove: 0.93
Wins: 8667, Losses: 16830, Draws: 7904
Episode: 33500, Win Rate: 0.26, Epsilon: 0.505, smartmove: 0.93
Wins: 8678, Losses: 16895, Draws: 7928
Episode: 33600, Win Rate: 0.26, Epsilon: 0.406, smartmove: 0.93
Wins: 8683, Losses: 16959, Draws: 7959
Episode: 33700, Win Rate: 0.26, Epsilon: 0.307, smartmove: 0.94
Wins: 8691, Losses: 17013, Draws: 7997
Episode: 33800, Win Rate: 0.26, Epsilon: 0.208, smartmove: 0.94
Wins: 8701, Losses: 17057, Draws: 8043
Episode: 33900



Resetting epsilon to 1.0
Episode: 34000, Win Rate: 0.26, Epsilon: 0.011, smartmove: 0.94
Wins: 8719, Losses: 17137, Draws: 8145
Checkpoint saved at episode 34000
Episode: 34100, Win Rate: 0.26, Epsilon: 0.901, smartmove: 0.95
Wins: 8723, Losses: 17225, Draws: 8153
Episode: 34200, Win Rate: 0.26, Epsilon: 0.802, smartmove: 0.95
Wins: 8723, Losses: 17310, Draws: 8168
Episode: 34300, Win Rate: 0.25, Epsilon: 0.703, smartmove: 0.95
Wins: 8728, Losses: 17394, Draws: 8179
Episode: 34400, Win Rate: 0.25, Epsilon: 0.604, smartmove: 0.96
Wins: 8731, Losses: 17470, Draws: 8200
Episode: 34500, Win Rate: 0.25, Epsilon: 0.505, smartmove: 0.96
Wins: 8740, Losses: 17534, Draws: 8227
Episode: 34600, Win Rate: 0.25, Epsilon: 0.406, smartmove: 0.96
Wins: 8745, Losses: 17597, Draws: 8259
Episode: 34700, Win Rate: 0.25, Epsilon: 0.307, smartmove: 0.96
Wins: 8751, Losses: 17640, Draws: 8310
Episode: 34800, Win Rate: 0.25, Epsilon: 0.208, smartmove: 0.97
Wins: 8762, Losses: 17684, Draws: 8355
Episode: 34900



Resetting epsilon to 1.0
Episode: 35000, Win Rate: 0.25, Epsilon: 0.011, smartmove: 0.97
Wins: 8785, Losses: 17774, Draws: 8442
Checkpoint saved at episode 35000
Episode: 35100, Win Rate: 0.25, Epsilon: 0.901, smartmove: 0.97
Wins: 8785, Losses: 17863, Draws: 8453
Episode: 35200, Win Rate: 0.25, Epsilon: 0.802, smartmove: 0.98
Wins: 8787, Losses: 17942, Draws: 8472
Episode: 35300, Win Rate: 0.25, Epsilon: 0.703, smartmove: 0.98
Wins: 8792, Losses: 18023, Draws: 8486
Episode: 35400, Win Rate: 0.25, Epsilon: 0.604, smartmove: 0.98
Wins: 8796, Losses: 18100, Draws: 8505
Episode: 35500, Win Rate: 0.25, Epsilon: 0.505, smartmove: 0.99
Wins: 8800, Losses: 18174, Draws: 8527
Episode: 35600, Win Rate: 0.25, Epsilon: 0.406, smartmove: 0.99
Wins: 8803, Losses: 18239, Draws: 8559
Episode: 35700, Win Rate: 0.25, Epsilon: 0.307, smartmove: 0.99
Wins: 8806, Losses: 18298, Draws: 8597
Episode: 35800, Win Rate: 0.25, Epsilon: 0.208, smartmove: 0.99
Wins: 8813, Losses: 18349, Draws: 8639
Episode: 35900



Resetting epsilon to 1.0
Episode: 36000, Win Rate: 0.25, Epsilon: 0.011, smartmove: 1.00
Wins: 8822, Losses: 18419, Draws: 8760
Checkpoint saved at episode 36000
Episode: 36100, Win Rate: 0.24, Epsilon: 0.901, smartmove: 1.00
Wins: 8826, Losses: 18506, Draws: 8769
Episode: 36200, Win Rate: 0.24, Epsilon: 0.802, smartmove: 1.00
Wins: 8829, Losses: 18590, Draws: 8782
Episode: 36300, Win Rate: 0.24, Epsilon: 0.703, smartmove: 1.00
Wins: 8835, Losses: 18667, Draws: 8799
Episode: 36400, Win Rate: 0.24, Epsilon: 0.604, smartmove: 1.00
Wins: 8836, Losses: 18745, Draws: 8820
Episode: 36500, Win Rate: 0.24, Epsilon: 0.505, smartmove: 1.00
Wins: 8839, Losses: 18809, Draws: 8853
Episode: 36600, Win Rate: 0.24, Epsilon: 0.406, smartmove: 1.00
Wins: 8840, Losses: 18879, Draws: 8882
Episode: 36700, Win Rate: 0.24, Epsilon: 0.307, smartmove: 1.00
Wins: 8845, Losses: 18929, Draws: 8927
Episode: 36800, Win Rate: 0.24, Epsilon: 0.208, smartmove: 1.00
Wins: 8848, Losses: 18980, Draws: 8973
Episode: 36900



Resetting epsilon to 1.0
Episode: 37000, Win Rate: 0.24, Epsilon: 0.011, smartmove: 1.00
Wins: 8864, Losses: 19069, Draws: 9068
Checkpoint saved at episode 37000
Episode: 37100, Win Rate: 0.24, Epsilon: 0.901, smartmove: 1.00
Wins: 8864, Losses: 19154, Draws: 9083
Episode: 37200, Win Rate: 0.24, Epsilon: 0.802, smartmove: 1.00
Wins: 8864, Losses: 19240, Draws: 9097
Episode: 37300, Win Rate: 0.24, Epsilon: 0.703, smartmove: 1.00
Wins: 8864, Losses: 19323, Draws: 9114
Episode: 37400, Win Rate: 0.24, Epsilon: 0.604, smartmove: 1.00
Wins: 8865, Losses: 19400, Draws: 9136
Episode: 37500, Win Rate: 0.24, Epsilon: 0.505, smartmove: 1.00
Wins: 8866, Losses: 19471, Draws: 9164
Episode: 37600, Win Rate: 0.24, Epsilon: 0.406, smartmove: 1.00
Wins: 8868, Losses: 19530, Draws: 9203
Episode: 37700, Win Rate: 0.24, Epsilon: 0.307, smartmove: 1.00
Wins: 8874, Losses: 19590, Draws: 9237
Episode: 37800, Win Rate: 0.23, Epsilon: 0.208, smartmove: 1.00
Wins: 8882, Losses: 19638, Draws: 9281
Episode: 37900



Resetting epsilon to 1.0
Episode: 38000, Win Rate: 0.23, Epsilon: 0.011, smartmove: 1.00
Wins: 8894, Losses: 19726, Draws: 9381
Checkpoint saved at episode 38000
Episode: 38100, Win Rate: 0.23, Epsilon: 0.901, smartmove: 1.00
Wins: 8896, Losses: 19818, Draws: 9387
Episode: 38200, Win Rate: 0.23, Epsilon: 0.802, smartmove: 1.00
Wins: 8899, Losses: 19906, Draws: 9396
Episode: 38300, Win Rate: 0.23, Epsilon: 0.703, smartmove: 1.00
Wins: 8903, Losses: 19983, Draws: 9415
Episode: 38400, Win Rate: 0.23, Epsilon: 0.604, smartmove: 1.00
Wins: 8908, Losses: 20057, Draws: 9436
Episode: 38500, Win Rate: 0.23, Epsilon: 0.505, smartmove: 1.00
Wins: 8911, Losses: 20131, Draws: 9459
Episode: 38600, Win Rate: 0.23, Epsilon: 0.406, smartmove: 1.00
Wins: 8914, Losses: 20200, Draws: 9487
Episode: 38700, Win Rate: 0.23, Epsilon: 0.307, smartmove: 1.00
Wins: 8920, Losses: 20256, Draws: 9525
Episode: 38800, Win Rate: 0.23, Epsilon: 0.208, smartmove: 1.00
Wins: 8924, Losses: 20301, Draws: 9576
Episode: 38900



Resetting epsilon to 1.0
Episode: 39000, Win Rate: 0.23, Epsilon: 0.011, smartmove: 1.00
Wins: 8939, Losses: 20366, Draws: 9696
Checkpoint saved at episode 39000
Episode: 39100, Win Rate: 0.23, Epsilon: 0.901, smartmove: 1.00
Wins: 8941, Losses: 20455, Draws: 9705
Episode: 39200, Win Rate: 0.23, Epsilon: 0.802, smartmove: 1.00
Wins: 8944, Losses: 20542, Draws: 9715
Episode: 39300, Win Rate: 0.23, Epsilon: 0.703, smartmove: 1.00
Wins: 8946, Losses: 20624, Draws: 9731
Episode: 39400, Win Rate: 0.23, Epsilon: 0.604, smartmove: 1.00
Wins: 8950, Losses: 20702, Draws: 9749
Episode: 39500, Win Rate: 0.23, Epsilon: 0.505, smartmove: 1.00
Wins: 8952, Losses: 20769, Draws: 9780
Episode: 39600, Win Rate: 0.23, Epsilon: 0.406, smartmove: 1.00
Wins: 8955, Losses: 20833, Draws: 9813
Episode: 39700, Win Rate: 0.23, Epsilon: 0.307, smartmove: 1.00
Wins: 8958, Losses: 20894, Draws: 9849
Episode: 39800, Win Rate: 0.23, Epsilon: 0.208, smartmove: 1.00
Wins: 8963, Losses: 20945, Draws: 9893
Episode: 39900



Final model saved as model6_retrain_retrain.h5


In [24]:
import numpy as np
import random
import os
from collections import deque
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.95, model_path='model6.h5'):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.001
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9995
        self.batch_size = 32
        self.replay_buffer = deque(maxlen=10000)
        self.model_path = model_path

        # Try to load the model if it exists, otherwise create a new one
        if os.path.exists(model_path):
            try:
                print(f'{model_path} exists. Loading the model.')
                self.model = load_model(model_path, compile=False)
                # Recompile the model with explicit loss function
                self.model.compile(
                    loss=MeanSquaredError(),
                    optimizer=Adam(learning_rate=self.learning_rate)
                )
            except Exception as e:
                print(f"Error loading model: {e}")
                print("Creating new model instead.")
                self.model = self._build_model()
        else:
            print(f'{model_path} does not exist. Starting new training.')
            self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(
            loss=MeanSquaredError(),
            optimizer=Adam(learning_rate=self.learning_rate)
        )
        return model

    def process_state(self, state):
        """
        Converts the state from [0,1,2] format to [-1,0,1] format for better learning.
        """
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        
        # Use batch prediction for efficiency
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                next_valid_actions = [j for j, val in enumerate(next_state) if val == 0]
                if next_valid_actions:
                    max_next_q = max(next_q_values[i][action] for action in next_valid_actions)
                    target = reward + self.gamma * max_next_q
                else:
                    target = reward
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=self.batch_size, epochs=1, verbose=0)

def train_agent(episodes=20000, model_path='model6_retrain_retrain.h5'):
    agent = SQNAgent(model_path=model_path)
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    initial_epsilon = 1.0
    min_epsilon = 0.01
    decay_episodes = 1000
    smartness=0
    for episode in range(episodes):
        if episode % 2000 == 0 and episode > 0:
            if episode%2000==0:
                agent.epsilon = 1.0
            smartness = min(0.8, episode / (episodes * 0.9))
            print("Resetting epsilon to 1.0")
        else:
            epsilon_decay = (initial_epsilon - min_epsilon) / decay_episodes
            agent.epsilon = max(min_epsilon, initial_epsilon - (episode % 2000) * epsilon_decay)
        
        
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 1.0
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            game.player1_move()
            if game.current_winner == 1:
                reward = -1.0
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = min(-0.1, -0.5 * smartness)
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        if episode % 100 == 0:
            win_rate = history['wins'] / (episode + 1)
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Epsilon: {agent.epsilon:.3f}, smartmove: {smartness:.2f}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
        
        if episode % 1000 == 0:
            try:
                agent.model.save(f'model6_retrain_special_retrain_retrain_episode_{episode}.h5')
                print(f"Checkpoint saved at episode {episode}")
            except Exception as e:
                print(f"Error saving checkpoint: {e}")

    # Save the final model
    try:
        agent.model.save("model6_retrain_special_retrain.h5")
        print(f'Final model saved as model6_retrain_special_retrain.h5')
    except Exception as e:
        print(f"Error saving final model: {e}")


train_agent()



model6_retrain_retrain.h5 exists. Loading the model.
Episode: 0, Win Rate: 0.00, Epsilon: 1.000, smartmove: 0.00
Wins: 0, Losses: 0, Draws: 1
Checkpoint saved at episode 0
Episode: 100, Win Rate: 0.34, Epsilon: 0.901, smartmove: 0.00
Wins: 34, Losses: 56, Draws: 11
Episode: 200, Win Rate: 0.33, Epsilon: 0.802, smartmove: 0.00
Wins: 67, Losses: 108, Draws: 26
Episode: 300, Win Rate: 0.38, Epsilon: 0.703, smartmove: 0.00
Wins: 114, Losses: 144, Draws: 43
Episode: 400, Win Rate: 0.39, Epsilon: 0.604, smartmove: 0.00
Wins: 155, Losses: 192, Draws: 54
Episode: 500, Win Rate: 0.38, Epsilon: 0.505, smartmove: 0.00
Wins: 188, Losses: 245, Draws: 68
Episode: 600, Win Rate: 0.40, Epsilon: 0.406, smartmove: 0.00
Wins: 242, Losses: 270, Draws: 89
Episode: 700, Win Rate: 0.42, Epsilon: 0.307, smartmove: 0.00
Wins: 295, Losses: 295, Draws: 111
Episode: 800, Win Rate: 0.43, Epsilon: 0.208, smartmove: 0.00
Wins: 343, Losses: 325, Draws: 133
Episode: 900, Win Rate: 0.43, Epsilon: 0.109, smartmove: 0.00



Episode: 1000, Win Rate: 0.44, Epsilon: 0.010, smartmove: 0.00
Wins: 437, Losses: 374, Draws: 190
Checkpoint saved at episode 1000
Episode: 1100, Win Rate: 0.46, Epsilon: 0.010, smartmove: 0.00
Wins: 501, Losses: 389, Draws: 211
Episode: 1200, Win Rate: 0.47, Epsilon: 0.010, smartmove: 0.00
Wins: 565, Losses: 409, Draws: 227
Episode: 1300, Win Rate: 0.48, Epsilon: 0.010, smartmove: 0.00
Wins: 627, Losses: 428, Draws: 246
Episode: 1400, Win Rate: 0.50, Epsilon: 0.010, smartmove: 0.00
Wins: 702, Losses: 441, Draws: 258
Episode: 1500, Win Rate: 0.51, Epsilon: 0.010, smartmove: 0.00
Wins: 764, Losses: 460, Draws: 277
Episode: 1600, Win Rate: 0.52, Epsilon: 0.010, smartmove: 0.00
Wins: 830, Losses: 476, Draws: 295
Episode: 1700, Win Rate: 0.52, Epsilon: 0.010, smartmove: 0.00
Wins: 893, Losses: 493, Draws: 315
Episode: 1800, Win Rate: 0.52, Epsilon: 0.010, smartmove: 0.00
Wins: 942, Losses: 523, Draws: 336
Episode: 1900, Win Rate: 0.52, Epsilon: 0.010, smartmove: 0.00
Wins: 984, Losses: 547



Resetting epsilon to 1.0
Episode: 2000, Win Rate: 0.51, Epsilon: 1.000, smartmove: 0.11
Wins: 1030, Losses: 579, Draws: 392
Checkpoint saved at episode 2000
Episode: 2100, Win Rate: 0.50, Epsilon: 0.901, smartmove: 0.11
Wins: 1053, Losses: 644, Draws: 404
Episode: 2200, Win Rate: 0.49, Epsilon: 0.802, smartmove: 0.11
Wins: 1081, Losses: 699, Draws: 421
Episode: 2300, Win Rate: 0.49, Epsilon: 0.703, smartmove: 0.11
Wins: 1124, Losses: 743, Draws: 434
Episode: 2400, Win Rate: 0.48, Epsilon: 0.604, smartmove: 0.11
Wins: 1157, Losses: 790, Draws: 454
Episode: 2500, Win Rate: 0.48, Epsilon: 0.505, smartmove: 0.11
Wins: 1193, Losses: 837, Draws: 471
Episode: 2600, Win Rate: 0.47, Epsilon: 0.406, smartmove: 0.11
Wins: 1220, Losses: 893, Draws: 488
Episode: 2700, Win Rate: 0.46, Epsilon: 0.307, smartmove: 0.11
Wins: 1249, Losses: 939, Draws: 513
Episode: 2800, Win Rate: 0.46, Epsilon: 0.208, smartmove: 0.11
Wins: 1285, Losses: 987, Draws: 529
Episode: 2900, Win Rate: 0.46, Epsilon: 0.109, smar



Episode: 3000, Win Rate: 0.46, Epsilon: 0.010, smartmove: 0.11
Wins: 1372, Losses: 1058, Draws: 571
Checkpoint saved at episode 3000
Episode: 3100, Win Rate: 0.46, Epsilon: 0.010, smartmove: 0.11
Wins: 1431, Losses: 1076, Draws: 594
Episode: 3200, Win Rate: 0.46, Epsilon: 0.010, smartmove: 0.11
Wins: 1476, Losses: 1108, Draws: 617
Episode: 3300, Win Rate: 0.46, Epsilon: 0.010, smartmove: 0.11
Wins: 1516, Losses: 1143, Draws: 642
Episode: 3400, Win Rate: 0.46, Epsilon: 0.010, smartmove: 0.11
Wins: 1558, Losses: 1180, Draws: 663
Episode: 3500, Win Rate: 0.45, Epsilon: 0.010, smartmove: 0.11
Wins: 1592, Losses: 1216, Draws: 693
Episode: 3600, Win Rate: 0.45, Epsilon: 0.010, smartmove: 0.11
Wins: 1628, Losses: 1252, Draws: 721
Episode: 3700, Win Rate: 0.45, Epsilon: 0.010, smartmove: 0.11
Wins: 1676, Losses: 1279, Draws: 746
Episode: 3800, Win Rate: 0.45, Epsilon: 0.010, smartmove: 0.11
Wins: 1719, Losses: 1313, Draws: 769
Episode: 3900, Win Rate: 0.45, Epsilon: 0.010, smartmove: 0.11
Wins



Resetting epsilon to 1.0
Episode: 4000, Win Rate: 0.45, Epsilon: 1.000, smartmove: 0.22
Wins: 1792, Losses: 1388, Draws: 821
Checkpoint saved at episode 4000
Episode: 4100, Win Rate: 0.44, Epsilon: 0.901, smartmove: 0.22
Wins: 1817, Losses: 1452, Draws: 832
Episode: 4200, Win Rate: 0.44, Epsilon: 0.802, smartmove: 0.22
Wins: 1841, Losses: 1515, Draws: 845
Episode: 4300, Win Rate: 0.44, Epsilon: 0.703, smartmove: 0.22
Wins: 1872, Losses: 1574, Draws: 855
Episode: 4400, Win Rate: 0.43, Epsilon: 0.604, smartmove: 0.22
Wins: 1900, Losses: 1632, Draws: 869
Episode: 4500, Win Rate: 0.43, Epsilon: 0.505, smartmove: 0.22
Wins: 1923, Losses: 1695, Draws: 883
Episode: 4600, Win Rate: 0.42, Epsilon: 0.406, smartmove: 0.22
Wins: 1949, Losses: 1753, Draws: 899
Episode: 4700, Win Rate: 0.42, Epsilon: 0.307, smartmove: 0.22
Wins: 1988, Losses: 1792, Draws: 921
Episode: 4800, Win Rate: 0.42, Epsilon: 0.208, smartmove: 0.22
Wins: 2022, Losses: 1836, Draws: 943
Episode: 4900, Win Rate: 0.42, Epsilon: 0.



Episode: 5000, Win Rate: 0.42, Epsilon: 0.010, smartmove: 0.22
Wins: 2103, Losses: 1909, Draws: 989
Checkpoint saved at episode 5000
Episode: 5100, Win Rate: 0.42, Epsilon: 0.010, smartmove: 0.22
Wins: 2137, Losses: 1942, Draws: 1022
Episode: 5200, Win Rate: 0.42, Epsilon: 0.010, smartmove: 0.22
Wins: 2170, Losses: 1978, Draws: 1053
Episode: 5300, Win Rate: 0.42, Epsilon: 0.010, smartmove: 0.22
Wins: 2201, Losses: 2012, Draws: 1088
Episode: 5400, Win Rate: 0.41, Epsilon: 0.010, smartmove: 0.22
Wins: 2232, Losses: 2051, Draws: 1118
Episode: 5500, Win Rate: 0.41, Epsilon: 0.010, smartmove: 0.22
Wins: 2274, Losses: 2087, Draws: 1140
Episode: 5600, Win Rate: 0.42, Epsilon: 0.010, smartmove: 0.22
Wins: 2326, Losses: 2114, Draws: 1161
Episode: 5700, Win Rate: 0.42, Epsilon: 0.010, smartmove: 0.22
Wins: 2376, Losses: 2141, Draws: 1184
Episode: 5800, Win Rate: 0.42, Epsilon: 0.010, smartmove: 0.22
Wins: 2409, Losses: 2170, Draws: 1222
Episode: 5900, Win Rate: 0.41, Epsilon: 0.010, smartmove: 0



Resetting epsilon to 1.0
Episode: 6000, Win Rate: 0.41, Epsilon: 1.000, smartmove: 0.33
Wins: 2468, Losses: 2245, Draws: 1288
Checkpoint saved at episode 6000
Episode: 6100, Win Rate: 0.41, Epsilon: 0.901, smartmove: 0.33
Wins: 2494, Losses: 2311, Draws: 1296
Episode: 6200, Win Rate: 0.41, Epsilon: 0.802, smartmove: 0.33
Wins: 2521, Losses: 2369, Draws: 1311
Episode: 6300, Win Rate: 0.40, Epsilon: 0.703, smartmove: 0.33
Wins: 2539, Losses: 2427, Draws: 1335
Episode: 6400, Win Rate: 0.40, Epsilon: 0.604, smartmove: 0.33
Wins: 2558, Losses: 2488, Draws: 1355
Episode: 6500, Win Rate: 0.40, Epsilon: 0.505, smartmove: 0.33
Wins: 2586, Losses: 2548, Draws: 1367
Episode: 6600, Win Rate: 0.40, Epsilon: 0.406, smartmove: 0.33
Wins: 2609, Losses: 2600, Draws: 1392
Episode: 6700, Win Rate: 0.39, Epsilon: 0.307, smartmove: 0.33
Wins: 2634, Losses: 2650, Draws: 1417
Episode: 6800, Win Rate: 0.39, Epsilon: 0.208, smartmove: 0.33
Wins: 2661, Losses: 2691, Draws: 1449
Episode: 6900, Win Rate: 0.39, Ep



Episode: 7000, Win Rate: 0.39, Epsilon: 0.010, smartmove: 0.33
Wins: 2727, Losses: 2776, Draws: 1498
Checkpoint saved at episode 7000
Episode: 7100, Win Rate: 0.39, Epsilon: 0.010, smartmove: 0.33
Wins: 2753, Losses: 2822, Draws: 1526
Episode: 7200, Win Rate: 0.39, Epsilon: 0.010, smartmove: 0.33
Wins: 2784, Losses: 2855, Draws: 1562
Episode: 7300, Win Rate: 0.38, Epsilon: 0.010, smartmove: 0.33
Wins: 2806, Losses: 2891, Draws: 1604
Episode: 7400, Win Rate: 0.38, Epsilon: 0.010, smartmove: 0.33
Wins: 2828, Losses: 2928, Draws: 1645
Episode: 7500, Win Rate: 0.38, Epsilon: 0.010, smartmove: 0.33
Wins: 2849, Losses: 2967, Draws: 1685
Episode: 7600, Win Rate: 0.38, Epsilon: 0.010, smartmove: 0.33
Wins: 2886, Losses: 2994, Draws: 1721
Episode: 7700, Win Rate: 0.38, Epsilon: 0.010, smartmove: 0.33
Wins: 2912, Losses: 3024, Draws: 1765
Episode: 7800, Win Rate: 0.38, Epsilon: 0.010, smartmove: 0.33
Wins: 2941, Losses: 3056, Draws: 1804
Episode: 7900, Win Rate: 0.38, Epsilon: 0.010, smartmove: 



Resetting epsilon to 1.0
Episode: 8000, Win Rate: 0.37, Epsilon: 1.000, smartmove: 0.44
Wins: 2993, Losses: 3122, Draws: 1886
Checkpoint saved at episode 8000
Episode: 8100, Win Rate: 0.37, Epsilon: 0.901, smartmove: 0.44
Wins: 3013, Losses: 3195, Draws: 1893
Episode: 8200, Win Rate: 0.37, Epsilon: 0.802, smartmove: 0.44
Wins: 3033, Losses: 3265, Draws: 1903
Episode: 8300, Win Rate: 0.37, Epsilon: 0.703, smartmove: 0.44
Wins: 3056, Losses: 3321, Draws: 1924
Episode: 8400, Win Rate: 0.37, Epsilon: 0.604, smartmove: 0.44
Wins: 3072, Losses: 3388, Draws: 1941
Episode: 8500, Win Rate: 0.36, Epsilon: 0.505, smartmove: 0.44
Wins: 3097, Losses: 3441, Draws: 1963
Episode: 8600, Win Rate: 0.36, Epsilon: 0.406, smartmove: 0.44
Wins: 3117, Losses: 3501, Draws: 1983
Episode: 8700, Win Rate: 0.36, Epsilon: 0.307, smartmove: 0.44
Wins: 3134, Losses: 3553, Draws: 2014
Episode: 8800, Win Rate: 0.36, Epsilon: 0.208, smartmove: 0.44
Wins: 3158, Losses: 3611, Draws: 2032
Episode: 8900, Win Rate: 0.36, Ep



Episode: 9000, Win Rate: 0.36, Epsilon: 0.010, smartmove: 0.44
Wins: 3202, Losses: 3705, Draws: 2094
Checkpoint saved at episode 9000
Episode: 9100, Win Rate: 0.35, Epsilon: 0.010, smartmove: 0.44
Wins: 3219, Losses: 3755, Draws: 2127
Episode: 9200, Win Rate: 0.35, Epsilon: 0.010, smartmove: 0.44
Wins: 3252, Losses: 3782, Draws: 2167
Episode: 9300, Win Rate: 0.35, Epsilon: 0.010, smartmove: 0.44
Wins: 3276, Losses: 3818, Draws: 2207
Episode: 9400, Win Rate: 0.35, Epsilon: 0.010, smartmove: 0.44
Wins: 3301, Losses: 3852, Draws: 2248
Episode: 9500, Win Rate: 0.35, Epsilon: 0.010, smartmove: 0.44
Wins: 3324, Losses: 3892, Draws: 2285
Episode: 9600, Win Rate: 0.35, Epsilon: 0.010, smartmove: 0.44
Wins: 3355, Losses: 3922, Draws: 2324
Episode: 9700, Win Rate: 0.35, Epsilon: 0.010, smartmove: 0.44
Wins: 3380, Losses: 3955, Draws: 2366
Episode: 9800, Win Rate: 0.35, Epsilon: 0.010, smartmove: 0.44
Wins: 3406, Losses: 3988, Draws: 2407
Episode: 9900, Win Rate: 0.35, Epsilon: 0.010, smartmove: 



Resetting epsilon to 1.0
Episode: 10000, Win Rate: 0.35, Epsilon: 1.000, smartmove: 0.56
Wins: 3460, Losses: 4053, Draws: 2488
Checkpoint saved at episode 10000
Episode: 10100, Win Rate: 0.34, Epsilon: 0.901, smartmove: 0.56
Wins: 3471, Losses: 4129, Draws: 2501
Episode: 10200, Win Rate: 0.34, Epsilon: 0.802, smartmove: 0.56
Wins: 3481, Losses: 4207, Draws: 2513
Episode: 10300, Win Rate: 0.34, Epsilon: 0.703, smartmove: 0.56
Wins: 3500, Losses: 4268, Draws: 2533
Episode: 10400, Win Rate: 0.34, Epsilon: 0.604, smartmove: 0.56
Wins: 3518, Losses: 4334, Draws: 2549
Episode: 10500, Win Rate: 0.34, Epsilon: 0.505, smartmove: 0.56
Wins: 3534, Losses: 4393, Draws: 2574
Episode: 10600, Win Rate: 0.34, Epsilon: 0.406, smartmove: 0.56
Wins: 3553, Losses: 4457, Draws: 2591
Episode: 10700, Win Rate: 0.33, Epsilon: 0.307, smartmove: 0.56
Wins: 3577, Losses: 4510, Draws: 2614
Episode: 10800, Win Rate: 0.33, Epsilon: 0.208, smartmove: 0.56
Wins: 3600, Losses: 4557, Draws: 2644
Episode: 10900, Win Rat



Episode: 11000, Win Rate: 0.33, Epsilon: 0.010, smartmove: 0.56
Wins: 3658, Losses: 4647, Draws: 2696
Checkpoint saved at episode 11000
Episode: 11100, Win Rate: 0.33, Epsilon: 0.010, smartmove: 0.56
Wins: 3694, Losses: 4678, Draws: 2729
Episode: 11200, Win Rate: 0.33, Epsilon: 0.010, smartmove: 0.56
Wins: 3726, Losses: 4706, Draws: 2769
Episode: 11300, Win Rate: 0.33, Epsilon: 0.010, smartmove: 0.56
Wins: 3746, Losses: 4736, Draws: 2819
Episode: 11400, Win Rate: 0.33, Epsilon: 0.010, smartmove: 0.56
Wins: 3775, Losses: 4771, Draws: 2855
Episode: 11500, Win Rate: 0.33, Epsilon: 0.010, smartmove: 0.56
Wins: 3797, Losses: 4811, Draws: 2893
Episode: 11600, Win Rate: 0.33, Epsilon: 0.010, smartmove: 0.56
Wins: 3818, Losses: 4849, Draws: 2934
Episode: 11700, Win Rate: 0.33, Epsilon: 0.010, smartmove: 0.56
Wins: 3832, Losses: 4894, Draws: 2975
Episode: 11800, Win Rate: 0.33, Epsilon: 0.010, smartmove: 0.56
Wins: 3858, Losses: 4928, Draws: 3015
Episode: 11900, Win Rate: 0.33, Epsilon: 0.010, 



Resetting epsilon to 1.0
Episode: 12000, Win Rate: 0.33, Epsilon: 1.000, smartmove: 0.67
Wins: 3906, Losses: 4990, Draws: 3105
Checkpoint saved at episode 12000
Episode: 12100, Win Rate: 0.32, Epsilon: 0.901, smartmove: 0.67
Wins: 3920, Losses: 5064, Draws: 3117
Episode: 12200, Win Rate: 0.32, Epsilon: 0.802, smartmove: 0.67
Wins: 3926, Losses: 5148, Draws: 3127
Episode: 12300, Win Rate: 0.32, Epsilon: 0.703, smartmove: 0.67
Wins: 3942, Losses: 5222, Draws: 3137
Episode: 12400, Win Rate: 0.32, Epsilon: 0.604, smartmove: 0.67
Wins: 3955, Losses: 5289, Draws: 3157
Episode: 12500, Win Rate: 0.32, Epsilon: 0.505, smartmove: 0.67
Wins: 3968, Losses: 5361, Draws: 3172
Episode: 12600, Win Rate: 0.32, Epsilon: 0.406, smartmove: 0.67
Wins: 3983, Losses: 5424, Draws: 3194
Episode: 12700, Win Rate: 0.32, Epsilon: 0.307, smartmove: 0.67
Wins: 4002, Losses: 5467, Draws: 3232
Episode: 12800, Win Rate: 0.31, Epsilon: 0.208, smartmove: 0.67
Wins: 4020, Losses: 5510, Draws: 3271
Episode: 12900, Win Rat



Episode: 13000, Win Rate: 0.31, Epsilon: 0.010, smartmove: 0.67
Wins: 4060, Losses: 5604, Draws: 3337
Checkpoint saved at episode 13000
Episode: 13100, Win Rate: 0.31, Epsilon: 0.010, smartmove: 0.67
Wins: 4083, Losses: 5637, Draws: 3381
Episode: 13200, Win Rate: 0.31, Epsilon: 0.010, smartmove: 0.67
Wins: 4105, Losses: 5668, Draws: 3428
Episode: 13300, Win Rate: 0.31, Epsilon: 0.010, smartmove: 0.67
Wins: 4128, Losses: 5702, Draws: 3471
Episode: 13400, Win Rate: 0.31, Epsilon: 0.010, smartmove: 0.67
Wins: 4155, Losses: 5740, Draws: 3506
Episode: 13500, Win Rate: 0.31, Epsilon: 0.010, smartmove: 0.67
Wins: 4178, Losses: 5778, Draws: 3545
Episode: 13600, Win Rate: 0.31, Epsilon: 0.010, smartmove: 0.67
Wins: 4204, Losses: 5817, Draws: 3580
Episode: 13700, Win Rate: 0.31, Epsilon: 0.010, smartmove: 0.67
Wins: 4225, Losses: 5858, Draws: 3618
Episode: 13800, Win Rate: 0.31, Epsilon: 0.010, smartmove: 0.67
Wins: 4254, Losses: 5898, Draws: 3649
Episode: 13900, Win Rate: 0.31, Epsilon: 0.010, 



Resetting epsilon to 1.0
Episode: 14000, Win Rate: 0.31, Epsilon: 1.000, smartmove: 0.78
Wins: 4296, Losses: 5980, Draws: 3725
Checkpoint saved at episode 14000
Episode: 14100, Win Rate: 0.31, Epsilon: 0.901, smartmove: 0.78
Wins: 4301, Losses: 6064, Draws: 3736
Episode: 14200, Win Rate: 0.30, Epsilon: 0.802, smartmove: 0.78
Wins: 4306, Losses: 6150, Draws: 3745
Episode: 14300, Win Rate: 0.30, Epsilon: 0.703, smartmove: 0.78
Wins: 4317, Losses: 6223, Draws: 3761
Episode: 14400, Win Rate: 0.30, Epsilon: 0.604, smartmove: 0.78
Wins: 4319, Losses: 6304, Draws: 3778
Episode: 14500, Win Rate: 0.30, Epsilon: 0.505, smartmove: 0.78
Wins: 4329, Losses: 6377, Draws: 3795
Episode: 14600, Win Rate: 0.30, Epsilon: 0.406, smartmove: 0.78
Wins: 4343, Losses: 6448, Draws: 3810
Episode: 14700, Win Rate: 0.30, Epsilon: 0.307, smartmove: 0.78
Wins: 4355, Losses: 6512, Draws: 3834
Episode: 14800, Win Rate: 0.29, Epsilon: 0.208, smartmove: 0.78
Wins: 4365, Losses: 6567, Draws: 3869
Episode: 14900, Win Rat



Episode: 15000, Win Rate: 0.29, Epsilon: 0.010, smartmove: 0.78
Wins: 4407, Losses: 6658, Draws: 3936
Checkpoint saved at episode 15000
Episode: 15100, Win Rate: 0.29, Epsilon: 0.010, smartmove: 0.78
Wins: 4429, Losses: 6695, Draws: 3977
Episode: 15200, Win Rate: 0.29, Epsilon: 0.010, smartmove: 0.78
Wins: 4447, Losses: 6736, Draws: 4018
Episode: 15300, Win Rate: 0.29, Epsilon: 0.010, smartmove: 0.78
Wins: 4469, Losses: 6774, Draws: 4058
Episode: 15400, Win Rate: 0.29, Epsilon: 0.010, smartmove: 0.78
Wins: 4485, Losses: 6813, Draws: 4103
Episode: 15500, Win Rate: 0.29, Epsilon: 0.010, smartmove: 0.78
Wins: 4502, Losses: 6855, Draws: 4144
Episode: 15600, Win Rate: 0.29, Epsilon: 0.010, smartmove: 0.78
Wins: 4526, Losses: 6891, Draws: 4184
Episode: 15700, Win Rate: 0.29, Epsilon: 0.010, smartmove: 0.78
Wins: 4547, Losses: 6930, Draws: 4224
Episode: 15800, Win Rate: 0.29, Epsilon: 0.010, smartmove: 0.78
Wins: 4565, Losses: 6965, Draws: 4271
Episode: 15900, Win Rate: 0.29, Epsilon: 0.010, 



Resetting epsilon to 1.0
Episode: 16000, Win Rate: 0.29, Epsilon: 1.000, smartmove: 0.80
Wins: 4606, Losses: 7037, Draws: 4358
Checkpoint saved at episode 16000
Episode: 16100, Win Rate: 0.29, Epsilon: 0.901, smartmove: 0.80
Wins: 4615, Losses: 7120, Draws: 4366
Episode: 16200, Win Rate: 0.29, Epsilon: 0.802, smartmove: 0.80
Wins: 4618, Losses: 7201, Draws: 4382
Episode: 16300, Win Rate: 0.28, Epsilon: 0.703, smartmove: 0.80
Wins: 4630, Losses: 7270, Draws: 4401
Episode: 16400, Win Rate: 0.28, Epsilon: 0.604, smartmove: 0.80
Wins: 4635, Losses: 7347, Draws: 4419
Episode: 16500, Win Rate: 0.28, Epsilon: 0.505, smartmove: 0.80
Wins: 4642, Losses: 7418, Draws: 4441
Episode: 16600, Win Rate: 0.28, Epsilon: 0.406, smartmove: 0.80
Wins: 4656, Losses: 7483, Draws: 4462
Episode: 16700, Win Rate: 0.28, Epsilon: 0.307, smartmove: 0.80
Wins: 4673, Losses: 7528, Draws: 4500
Episode: 16800, Win Rate: 0.28, Epsilon: 0.208, smartmove: 0.80
Wins: 4684, Losses: 7572, Draws: 4545
Episode: 16900, Win Rat



Episode: 17000, Win Rate: 0.28, Epsilon: 0.010, smartmove: 0.80
Wins: 4737, Losses: 7636, Draws: 4628
Checkpoint saved at episode 17000
Episode: 17100, Win Rate: 0.28, Epsilon: 0.010, smartmove: 0.80
Wins: 4757, Losses: 7664, Draws: 4680
Episode: 17200, Win Rate: 0.28, Epsilon: 0.010, smartmove: 0.80
Wins: 4777, Losses: 7689, Draws: 4735
Episode: 17300, Win Rate: 0.28, Epsilon: 0.010, smartmove: 0.80
Wins: 4796, Losses: 7719, Draws: 4786
Episode: 17400, Win Rate: 0.28, Epsilon: 0.010, smartmove: 0.80
Wins: 4811, Losses: 7751, Draws: 4839
Episode: 17500, Win Rate: 0.28, Epsilon: 0.010, smartmove: 0.80
Wins: 4833, Losses: 7782, Draws: 4886
Episode: 17600, Win Rate: 0.28, Epsilon: 0.010, smartmove: 0.80
Wins: 4856, Losses: 7805, Draws: 4940
Episode: 17700, Win Rate: 0.28, Epsilon: 0.010, smartmove: 0.80
Wins: 4875, Losses: 7831, Draws: 4995
Episode: 17800, Win Rate: 0.28, Epsilon: 0.010, smartmove: 0.80
Wins: 4902, Losses: 7853, Draws: 5046
Episode: 17900, Win Rate: 0.27, Epsilon: 0.010, 



Resetting epsilon to 1.0
Episode: 18000, Win Rate: 0.27, Epsilon: 1.000, smartmove: 0.80
Wins: 4946, Losses: 7922, Draws: 5133
Checkpoint saved at episode 18000
Episode: 18100, Win Rate: 0.27, Epsilon: 0.901, smartmove: 0.80
Wins: 4956, Losses: 7999, Draws: 5146
Episode: 18200, Win Rate: 0.27, Epsilon: 0.802, smartmove: 0.80
Wins: 4965, Losses: 8073, Draws: 5163
Episode: 18300, Win Rate: 0.27, Epsilon: 0.703, smartmove: 0.80
Wins: 4975, Losses: 8145, Draws: 5181
Episode: 18400, Win Rate: 0.27, Epsilon: 0.604, smartmove: 0.80
Wins: 4984, Losses: 8217, Draws: 5200
Episode: 18500, Win Rate: 0.27, Epsilon: 0.505, smartmove: 0.80
Wins: 4989, Losses: 8287, Draws: 5225
Episode: 18600, Win Rate: 0.27, Epsilon: 0.406, smartmove: 0.80
Wins: 4996, Losses: 8354, Draws: 5251
Episode: 18700, Win Rate: 0.27, Epsilon: 0.307, smartmove: 0.80
Wins: 5009, Losses: 8405, Draws: 5287
Episode: 18800, Win Rate: 0.27, Epsilon: 0.208, smartmove: 0.80
Wins: 5028, Losses: 8453, Draws: 5320
Episode: 18900, Win Rat



Episode: 19000, Win Rate: 0.27, Epsilon: 0.010, smartmove: 0.80
Wins: 5063, Losses: 8516, Draws: 5422
Checkpoint saved at episode 19000
Episode: 19100, Win Rate: 0.27, Epsilon: 0.010, smartmove: 0.80
Wins: 5086, Losses: 8540, Draws: 5475
Episode: 19200, Win Rate: 0.27, Epsilon: 0.010, smartmove: 0.80
Wins: 5109, Losses: 8563, Draws: 5529
Episode: 19300, Win Rate: 0.27, Epsilon: 0.010, smartmove: 0.80
Wins: 5129, Losses: 8582, Draws: 5590
Episode: 19400, Win Rate: 0.27, Epsilon: 0.010, smartmove: 0.80
Wins: 5153, Losses: 8605, Draws: 5643
Episode: 19500, Win Rate: 0.26, Epsilon: 0.010, smartmove: 0.80
Wins: 5166, Losses: 8630, Draws: 5705
Episode: 19600, Win Rate: 0.26, Epsilon: 0.010, smartmove: 0.80
Wins: 5193, Losses: 8650, Draws: 5758
Episode: 19700, Win Rate: 0.26, Epsilon: 0.010, smartmove: 0.80
Wins: 5211, Losses: 8677, Draws: 5813
Episode: 19800, Win Rate: 0.26, Epsilon: 0.010, smartmove: 0.80
Wins: 5245, Losses: 8702, Draws: 5854
Episode: 19900, Win Rate: 0.26, Epsilon: 0.010, 



Final model saved as model6_retrain_special_retrain.h5


In [29]:
import numpy as np
import random
import os
from collections import deque
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from TicTacToe import TicTacToe

class SQNAgent:
    def __init__(self, state_size=9, action_size=9, gamma=0.99, model_path='model6_retrain.h5'):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma  
        self.learning_rate = 0.0005
        self.epsilon = 1.0
        self.epsilon_min = 0.01  # Keeping original minimum epsilon
        self.epsilon_decay = 0.9994
        self.batch_size = 64
        self.replay_buffer = deque(maxlen=50000)
        self.model_path = model_path

        if os.path.exists(model_path):
            try:
                print(f'{model_path} exists. Loading the model.')
                self.model = load_model(model_path, compile=False)
                self.model.compile(
                    loss=MeanSquaredError(),
                    optimizer=Adam(learning_rate=self.learning_rate)
                )
            except Exception as e:
                print(f"Error loading model: {e}")
                print("Creating new model instead.")
                self.model = self._build_model()
        else:
            print(f'{model_path} does not exist. Starting new training.')
            self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(64, input_dim=self.state_size, activation='relu'),
            Dense(64, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(
            loss=MeanSquaredError(),
            optimizer=Adam(learning_rate=self.learning_rate)
        )
        return model

    def process_state(self, state):
        processed_state = np.array(state).copy()
        processed_state[processed_state == 0] = -1
        processed_state[processed_state == 1] = 0
        processed_state[processed_state == 2] = 1
        return processed_state

    def select_action(self, state, valid_actions):
        if not valid_actions:
            return None

        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        processed_state = self.process_state(state)
        q_values = self.model.predict(processed_state.reshape(1, -1), verbose=0)[0]
        
        # Prioritize center and corner moves when Q-values are close
        if len(valid_actions) > 1:
            preferred_moves = [4] if 4 in valid_actions else []  # Center
            preferred_moves.extend([0, 2, 6, 8] for pos in [0, 2, 6, 8] if pos in valid_actions)  # Corners
            
            max_q = max(q_values[action] for action in valid_actions)
            good_actions = [action for action in valid_actions 
                          if q_values[action] > max_q - 0.1]
            
            for preferred in preferred_moves:
                if preferred in good_actions:
                    return preferred

        valid_q_values = [(q_values[action], action) for action in valid_actions]
        return max(valid_q_values, key=lambda x: x[0])[1]

    def store_experience(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self):
        if len(self.replay_buffer) < self.batch_size:
            return

        mini_batch = random.sample(self.replay_buffer, self.batch_size)
        states = np.array([self.process_state(exp[0]) for exp in mini_batch])
        next_states = np.array([self.process_state(exp[3]) for exp in mini_batch])
        
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.model.predict(next_states, verbose=0)

        x = []
        y = []

        for i, (state, action, reward, next_state, done) in enumerate(mini_batch):
            if done:
                target = reward
            else:
                next_valid_actions = [j for j, val in enumerate(next_state) if val == 0]
                if next_valid_actions:
                    max_next_q = max(next_q_values[i][action] for action in next_valid_actions)
                    target = reward + self.gamma * max_next_q
                else:
                    target = reward
            
            current_q = current_q_values[i].copy()
            current_q[action] = target
            x.append(self.process_state(state))
            y.append(current_q)

        self.model.fit(np.array(x), np.array(y), batch_size=self.batch_size, epochs=1, verbose=0)

def train_agent(episodes=20000, model_path='model6_retrain_special_retrain.h5'):
    agent = SQNAgent(model_path=model_path)
    history = {'wins': 0, 'losses': 0, 'draws': 0}
    initial_epsilon = 1.0
    min_epsilon = 0.01
    decay_episodes = 1000
    smartness = 0
    
    for episode in range(episodes):
        # Original exploration strategy
        if episode % 5000== 0 and episode > 0:
            if episode % 5000== 0:
                agent.epsilon = 1.0
            smartness = min(0.8, episode / (episodes * 0.8))  # Cap at 0.8 smartness
            print("Resetting epsilon to 1.0")
        else:
            agent.epsilon = max(min_epsilon, agent.epsilon*agent.epsilon_decay)
        
        game = TicTacToe(smartMovePlayer1=smartness)
        state = np.array(game.board)
        
        game.player1_move()
        state = np.array(game.board)
        
        while True:
            valid_actions = game.empty_positions()
            if not valid_actions:
                history['draws'] += 1
                break
                
            action = agent.select_action(state, valid_actions)
            game.make_move(action, player=2)
            
            reward = 0
            if game.current_winner == 2:
                reward = 1.0
                history['wins'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            game.player1_move()
            if game.current_winner == 1:
                reward = -1.0
                history['losses'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            elif game.is_full():
                reward = min(-0.1, -0.5 * smartness)
                history['draws'] += 1
                agent.store_experience(state, action, reward, game.board, True)
                break
            
            agent.store_experience(state, action, reward, game.board, False)
            state = np.array(game.board)
        
        agent.train()
        
        if episode % 100 == 0:
            recent_games = 1000
            recent_start = max(0, episode - recent_games)
            recent_total = episode - recent_start + 1
            win_rate = (history['wins'] - (history['wins'] // recent_total)) / recent_total
            print(f"Episode: {episode}, Recent Win Rate: {win_rate:.2f}, "
                  f"Epsilon: {agent.epsilon:.3f}, Smartness: {smartness:.2f}")
            print(f"Wins: {history['wins']}, Losses: {history['losses']}, Draws: {history['draws']}")
        
        if episode % 1000 == 0:
            try:
                agent.model.save(f'model6_retrain_optimized_episode_{episode}.h5')
                print(f"Checkpoint saved at episode {episode}")
            except Exception as e:
                print(f"Error saving checkpoint: {e}")

    try:
        agent.model.save("modle16_final_final.h5")
        print(f'Final model saved as modle16_final_final.h5')
    except Exception as e:
        print(f"Error saving final model: {e}")

if __name__ == "__main__":
    train_agent()



model6_retrain_special_retrain.h5 exists. Loading the model.
Episode: 0, Recent Win Rate: 0.00, Epsilon: 0.999, Smartness: 0.00
Wins: 1, Losses: 0, Draws: 0
Checkpoint saved at episode 0
Episode: 100, Recent Win Rate: 0.31, Epsilon: 0.941, Smartness: 0.00
Wins: 31, Losses: 60, Draws: 10
Episode: 200, Recent Win Rate: 0.35, Epsilon: 0.886, Smartness: 0.00
Wins: 71, Losses: 105, Draws: 25
Episode: 300, Recent Win Rate: 0.36, Epsilon: 0.835, Smartness: 0.00
Wins: 108, Losses: 150, Draws: 43
Episode: 400, Recent Win Rate: 0.36, Epsilon: 0.786, Smartness: 0.00
Wins: 146, Losses: 203, Draws: 52
Episode: 500, Recent Win Rate: 0.38, Epsilon: 0.740, Smartness: 0.00
Wins: 188, Losses: 245, Draws: 68
Episode: 600, Recent Win Rate: 0.37, Epsilon: 0.697, Smartness: 0.00
Wins: 225, Losses: 285, Draws: 91
Episode: 700, Recent Win Rate: 0.40, Epsilon: 0.657, Smartness: 0.00
Wins: 278, Losses: 319, Draws: 104
Episode: 800, Recent Win Rate: 0.39, Epsilon: 0.618, Smartness: 0.00
Wins: 316, Losses: 363, D



Episode: 1000, Recent Win Rate: 0.41, Epsilon: 0.548, Smartness: 0.00
Wins: 414, Losses: 436, Draws: 151
Checkpoint saved at episode 1000
Episode: 1100, Recent Win Rate: 0.46, Epsilon: 0.516, Smartness: 0.00
Wins: 464, Losses: 474, Draws: 163
Episode: 1200, Recent Win Rate: 0.52, Epsilon: 0.486, Smartness: 0.00
Wins: 521, Losses: 505, Draws: 175
Episode: 1300, Recent Win Rate: 0.58, Epsilon: 0.458, Smartness: 0.00
Wins: 584, Losses: 524, Draws: 193
Episode: 1400, Recent Win Rate: 0.63, Epsilon: 0.431, Smartness: 0.00
Wins: 633, Losses: 559, Draws: 209
Episode: 1500, Recent Win Rate: 0.69, Epsilon: 0.406, Smartness: 0.00
Wins: 691, Losses: 582, Draws: 228
Episode: 1600, Recent Win Rate: 0.75, Epsilon: 0.383, Smartness: 0.00
Wins: 748, Losses: 609, Draws: 244
Episode: 1700, Recent Win Rate: 0.81, Epsilon: 0.360, Smartness: 0.00
Wins: 810, Losses: 636, Draws: 255
Episode: 1800, Recent Win Rate: 0.87, Epsilon: 0.339, Smartness: 0.00
Wins: 869, Losses: 667, Draws: 265
Episode: 1900, Recent 



Episode: 2000, Recent Win Rate: 0.97, Epsilon: 0.301, Smartness: 0.00
Wins: 971, Losses: 726, Draws: 304
Checkpoint saved at episode 2000
Episode: 2100, Recent Win Rate: 1.03, Epsilon: 0.283, Smartness: 0.00
Wins: 1031, Losses: 753, Draws: 317
Episode: 2200, Recent Win Rate: 1.09, Epsilon: 0.267, Smartness: 0.00
Wins: 1088, Losses: 779, Draws: 334
Episode: 2300, Recent Win Rate: 1.15, Epsilon: 0.251, Smartness: 0.00
Wins: 1149, Losses: 801, Draws: 351
Episode: 2400, Recent Win Rate: 1.22, Epsilon: 0.237, Smartness: 0.00
Wins: 1220, Losses: 820, Draws: 361
Episode: 2500, Recent Win Rate: 1.28, Epsilon: 0.223, Smartness: 0.00
Wins: 1285, Losses: 844, Draws: 372
Episode: 2600, Recent Win Rate: 1.34, Epsilon: 0.210, Smartness: 0.00
Wins: 1345, Losses: 868, Draws: 388
Episode: 2700, Recent Win Rate: 1.41, Epsilon: 0.198, Smartness: 0.00
Wins: 1412, Losses: 886, Draws: 403
Episode: 2800, Recent Win Rate: 1.48, Epsilon: 0.186, Smartness: 0.00
Wins: 1479, Losses: 902, Draws: 420
Episode: 2900,



Episode: 3000, Recent Win Rate: 1.62, Epsilon: 0.165, Smartness: 0.00
Wins: 1624, Losses: 934, Draws: 443
Checkpoint saved at episode 3000
Episode: 3100, Recent Win Rate: 1.68, Epsilon: 0.155, Smartness: 0.00
Wins: 1681, Losses: 957, Draws: 463
Episode: 3200, Recent Win Rate: 1.74, Epsilon: 0.146, Smartness: 0.00
Wins: 1744, Losses: 971, Draws: 486
Episode: 3300, Recent Win Rate: 1.81, Epsilon: 0.138, Smartness: 0.00
Wins: 1808, Losses: 986, Draws: 507
Episode: 3400, Recent Win Rate: 1.87, Epsilon: 0.130, Smartness: 0.00
Wins: 1868, Losses: 1002, Draws: 531
Episode: 3500, Recent Win Rate: 1.93, Epsilon: 0.122, Smartness: 0.00
Wins: 1929, Losses: 1024, Draws: 548
Episode: 3600, Recent Win Rate: 2.00, Epsilon: 0.115, Smartness: 0.00
Wins: 1998, Losses: 1037, Draws: 566
Episode: 3700, Recent Win Rate: 2.06, Epsilon: 0.108, Smartness: 0.00
Wins: 2062, Losses: 1057, Draws: 582
Episode: 3800, Recent Win Rate: 2.12, Epsilon: 0.102, Smartness: 0.00
Wins: 2125, Losses: 1075, Draws: 601
Episode:



Episode: 4000, Recent Win Rate: 2.26, Epsilon: 0.091, Smartness: 0.00
Wins: 2261, Losses: 1106, Draws: 634
Checkpoint saved at episode 4000
Episode: 4100, Recent Win Rate: 2.32, Epsilon: 0.085, Smartness: 0.00
Wins: 2325, Losses: 1124, Draws: 652
Episode: 4200, Recent Win Rate: 2.38, Epsilon: 0.080, Smartness: 0.00
Wins: 2389, Losses: 1139, Draws: 673
Episode: 4300, Recent Win Rate: 2.45, Epsilon: 0.076, Smartness: 0.00
Wins: 2453, Losses: 1152, Draws: 696
Episode: 4400, Recent Win Rate: 2.50, Epsilon: 0.071, Smartness: 0.00
Wins: 2507, Losses: 1176, Draws: 718
Episode: 4500, Recent Win Rate: 2.57, Epsilon: 0.067, Smartness: 0.00
Wins: 2574, Losses: 1188, Draws: 739
Episode: 4600, Recent Win Rate: 2.63, Epsilon: 0.063, Smartness: 0.00
Wins: 2639, Losses: 1205, Draws: 757
Episode: 4700, Recent Win Rate: 2.69, Epsilon: 0.060, Smartness: 0.00
Wins: 2699, Losses: 1222, Draws: 780
Episode: 4800, Recent Win Rate: 2.75, Epsilon: 0.056, Smartness: 0.00
Wins: 2755, Losses: 1242, Draws: 804
Epis



Resetting epsilon to 1.0
Episode: 5000, Recent Win Rate: 2.87, Epsilon: 1.000, Smartness: 0.31
Wins: 2872, Losses: 1275, Draws: 854
Checkpoint saved at episode 5000
Episode: 5100, Recent Win Rate: 2.89, Epsilon: 0.942, Smartness: 0.31
Wins: 2890, Losses: 1344, Draws: 867
Episode: 5200, Recent Win Rate: 2.91, Epsilon: 0.887, Smartness: 0.31
Wins: 2913, Losses: 1406, Draws: 882
Episode: 5300, Recent Win Rate: 2.92, Epsilon: 0.835, Smartness: 0.31
Wins: 2927, Losses: 1476, Draws: 898
Episode: 5400, Recent Win Rate: 2.95, Epsilon: 0.787, Smartness: 0.31
Wins: 2956, Losses: 1536, Draws: 909
Episode: 5500, Recent Win Rate: 2.98, Epsilon: 0.741, Smartness: 0.31
Wins: 2987, Losses: 1590, Draws: 924
Episode: 5600, Recent Win Rate: 3.00, Epsilon: 0.698, Smartness: 0.31
Wins: 3010, Losses: 1655, Draws: 936
Episode: 5700, Recent Win Rate: 3.04, Epsilon: 0.657, Smartness: 0.31
Wins: 3042, Losses: 1709, Draws: 950
Episode: 5800, Recent Win Rate: 3.07, Epsilon: 0.619, Smartness: 0.31
Wins: 3073, Loss



Episode: 6000, Recent Win Rate: 3.12, Epsilon: 0.549, Smartness: 0.31
Wins: 3127, Losses: 1868, Draws: 1006
Checkpoint saved at episode 6000
Episode: 6100, Recent Win Rate: 3.16, Epsilon: 0.517, Smartness: 0.31
Wins: 3163, Losses: 1913, Draws: 1025
Episode: 6200, Recent Win Rate: 3.19, Epsilon: 0.487, Smartness: 0.31
Wins: 3195, Losses: 1959, Draws: 1047
Episode: 6300, Recent Win Rate: 3.22, Epsilon: 0.458, Smartness: 0.31
Wins: 3226, Losses: 2003, Draws: 1072
Episode: 6400, Recent Win Rate: 3.25, Epsilon: 0.432, Smartness: 0.31
Wins: 3255, Losses: 2058, Draws: 1088
Episode: 6500, Recent Win Rate: 3.28, Epsilon: 0.406, Smartness: 0.31
Wins: 3290, Losses: 2102, Draws: 1109
Episode: 6600, Recent Win Rate: 3.32, Epsilon: 0.383, Smartness: 0.31
Wins: 3328, Losses: 2146, Draws: 1127
Episode: 6700, Recent Win Rate: 3.36, Epsilon: 0.360, Smartness: 0.31
Wins: 3364, Losses: 2190, Draws: 1147
Episode: 6800, Recent Win Rate: 3.39, Epsilon: 0.339, Smartness: 0.31
Wins: 3395, Losses: 2237, Draws: 



Episode: 7000, Recent Win Rate: 3.45, Epsilon: 0.301, Smartness: 0.31
Wins: 3460, Losses: 2325, Draws: 1216
Checkpoint saved at episode 7000
Episode: 7100, Recent Win Rate: 3.48, Epsilon: 0.284, Smartness: 0.31
Wins: 3491, Losses: 2368, Draws: 1242
Episode: 7200, Recent Win Rate: 3.51, Epsilon: 0.267, Smartness: 0.31
Wins: 3518, Losses: 2418, Draws: 1265
Episode: 7300, Recent Win Rate: 3.55, Epsilon: 0.251, Smartness: 0.31
Wins: 3557, Losses: 2458, Draws: 1286
Episode: 7400, Recent Win Rate: 3.60, Epsilon: 0.237, Smartness: 0.31
Wins: 3605, Losses: 2495, Draws: 1301
Episode: 7500, Recent Win Rate: 3.64, Epsilon: 0.223, Smartness: 0.31
Wins: 3644, Losses: 2539, Draws: 1318
Episode: 7600, Recent Win Rate: 3.68, Epsilon: 0.210, Smartness: 0.31
Wins: 3682, Losses: 2581, Draws: 1338
Episode: 7700, Recent Win Rate: 3.71, Epsilon: 0.198, Smartness: 0.31
Wins: 3717, Losses: 2618, Draws: 1366
Episode: 7800, Recent Win Rate: 3.75, Epsilon: 0.186, Smartness: 0.31
Wins: 3756, Losses: 2658, Draws: 



Episode: 8000, Recent Win Rate: 3.82, Epsilon: 0.165, Smartness: 0.31
Wins: 3829, Losses: 2738, Draws: 1434
Checkpoint saved at episode 8000
Episode: 8100, Recent Win Rate: 3.87, Epsilon: 0.156, Smartness: 0.31
Wins: 3873, Losses: 2769, Draws: 1459
Episode: 8200, Recent Win Rate: 3.91, Epsilon: 0.147, Smartness: 0.31
Wins: 3913, Losses: 2802, Draws: 1486
Episode: 8300, Recent Win Rate: 3.96, Epsilon: 0.138, Smartness: 0.31
Wins: 3964, Losses: 2827, Draws: 1510
Episode: 8400, Recent Win Rate: 3.99, Epsilon: 0.130, Smartness: 0.31
Wins: 4001, Losses: 2859, Draws: 1541
Episode: 8500, Recent Win Rate: 4.02, Epsilon: 0.122, Smartness: 0.31
Wins: 4032, Losses: 2895, Draws: 1574
Episode: 8600, Recent Win Rate: 4.07, Epsilon: 0.115, Smartness: 0.31
Wins: 4074, Losses: 2928, Draws: 1599
Episode: 8700, Recent Win Rate: 4.10, Epsilon: 0.109, Smartness: 0.31
Wins: 4109, Losses: 2968, Draws: 1624
Episode: 8800, Recent Win Rate: 4.14, Epsilon: 0.102, Smartness: 0.31
Wins: 4148, Losses: 3007, Draws: 



Episode: 9000, Recent Win Rate: 4.21, Epsilon: 0.091, Smartness: 0.31
Wins: 4222, Losses: 3080, Draws: 1699
Checkpoint saved at episode 9000
Episode: 9100, Recent Win Rate: 4.26, Epsilon: 0.085, Smartness: 0.31
Wins: 4268, Losses: 3108, Draws: 1725
Episode: 9200, Recent Win Rate: 4.31, Epsilon: 0.080, Smartness: 0.31
Wins: 4315, Losses: 3134, Draws: 1752
Episode: 9300, Recent Win Rate: 4.35, Epsilon: 0.076, Smartness: 0.31
Wins: 4355, Losses: 3163, Draws: 1783
Episode: 9400, Recent Win Rate: 4.39, Epsilon: 0.071, Smartness: 0.31
Wins: 4394, Losses: 3198, Draws: 1809
Episode: 9500, Recent Win Rate: 4.42, Epsilon: 0.067, Smartness: 0.31
Wins: 4427, Losses: 3236, Draws: 1838
Episode: 9600, Recent Win Rate: 4.45, Epsilon: 0.063, Smartness: 0.31
Wins: 4462, Losses: 3273, Draws: 1866
Episode: 9700, Recent Win Rate: 4.50, Epsilon: 0.060, Smartness: 0.31
Wins: 4506, Losses: 3302, Draws: 1893
Episode: 9800, Recent Win Rate: 4.53, Epsilon: 0.056, Smartness: 0.31
Wins: 4543, Losses: 3342, Draws: 



Resetting epsilon to 1.0
Episode: 10000, Recent Win Rate: 4.61, Epsilon: 1.000, Smartness: 0.62
Wins: 4615, Losses: 3403, Draws: 1983
Checkpoint saved at episode 10000
Episode: 10100, Recent Win Rate: 4.62, Epsilon: 0.942, Smartness: 0.62
Wins: 4626, Losses: 3477, Draws: 1998
Episode: 10200, Recent Win Rate: 4.63, Epsilon: 0.887, Smartness: 0.62
Wins: 4639, Losses: 3552, Draws: 2010
Episode: 10300, Recent Win Rate: 4.64, Epsilon: 0.835, Smartness: 0.62
Wins: 4648, Losses: 3632, Draws: 2021
Episode: 10400, Recent Win Rate: 4.65, Epsilon: 0.787, Smartness: 0.62
Wins: 4661, Losses: 3705, Draws: 2035
Episode: 10500, Recent Win Rate: 4.67, Epsilon: 0.741, Smartness: 0.62
Wins: 4674, Losses: 3777, Draws: 2050
Episode: 10600, Recent Win Rate: 4.68, Epsilon: 0.698, Smartness: 0.62
Wins: 4692, Losses: 3839, Draws: 2070
Episode: 10700, Recent Win Rate: 4.70, Epsilon: 0.657, Smartness: 0.62
Wins: 4709, Losses: 3905, Draws: 2087
Episode: 10800, Recent Win Rate: 4.72, Epsilon: 0.619, Smartness: 0.6



Episode: 11000, Recent Win Rate: 4.75, Epsilon: 0.549, Smartness: 0.62
Wins: 4763, Losses: 4093, Draws: 2145
Checkpoint saved at episode 11000
Episode: 11100, Recent Win Rate: 4.78, Epsilon: 0.517, Smartness: 0.62
Wins: 4785, Losses: 4156, Draws: 2160
Episode: 11200, Recent Win Rate: 4.80, Epsilon: 0.487, Smartness: 0.62
Wins: 4808, Losses: 4211, Draws: 2182
Episode: 11300, Recent Win Rate: 4.82, Epsilon: 0.458, Smartness: 0.62
Wins: 4829, Losses: 4269, Draws: 2203
Episode: 11400, Recent Win Rate: 4.84, Epsilon: 0.432, Smartness: 0.62
Wins: 4849, Losses: 4325, Draws: 2227
Episode: 11500, Recent Win Rate: 4.86, Epsilon: 0.406, Smartness: 0.62
Wins: 4869, Losses: 4382, Draws: 2250
Episode: 11600, Recent Win Rate: 4.88, Epsilon: 0.383, Smartness: 0.62
Wins: 4891, Losses: 4435, Draws: 2275
Episode: 11700, Recent Win Rate: 4.90, Epsilon: 0.360, Smartness: 0.62
Wins: 4909, Losses: 4491, Draws: 2301
Episode: 11800, Recent Win Rate: 4.92, Epsilon: 0.339, Smartness: 0.62
Wins: 4926, Losses: 455



Episode: 12000, Recent Win Rate: 4.95, Epsilon: 0.301, Smartness: 0.62
Wins: 4961, Losses: 4662, Draws: 2378
Checkpoint saved at episode 12000
Episode: 12100, Recent Win Rate: 4.97, Epsilon: 0.284, Smartness: 0.62
Wins: 4978, Losses: 4716, Draws: 2407
Episode: 12200, Recent Win Rate: 4.99, Epsilon: 0.267, Smartness: 0.62
Wins: 4998, Losses: 4775, Draws: 2428
Episode: 12300, Recent Win Rate: 5.01, Epsilon: 0.251, Smartness: 0.62
Wins: 5019, Losses: 4826, Draws: 2456
Episode: 12400, Recent Win Rate: 5.03, Epsilon: 0.237, Smartness: 0.62
Wins: 5037, Losses: 4885, Draws: 2479
Episode: 12500, Recent Win Rate: 5.06, Epsilon: 0.223, Smartness: 0.62
Wins: 5066, Losses: 4932, Draws: 2503
Episode: 12600, Recent Win Rate: 5.07, Epsilon: 0.210, Smartness: 0.62
Wins: 5085, Losses: 4979, Draws: 2537
Episode: 12700, Recent Win Rate: 5.09, Epsilon: 0.198, Smartness: 0.62
Wins: 5102, Losses: 5036, Draws: 2563
Episode: 12800, Recent Win Rate: 5.11, Epsilon: 0.186, Smartness: 0.62
Wins: 5125, Losses: 508



Episode: 13000, Recent Win Rate: 5.15, Epsilon: 0.165, Smartness: 0.62
Wins: 5164, Losses: 5186, Draws: 2651
Checkpoint saved at episode 13000
Episode: 13100, Recent Win Rate: 5.18, Epsilon: 0.156, Smartness: 0.62
Wins: 5193, Losses: 5233, Draws: 2675
Episode: 13200, Recent Win Rate: 5.20, Epsilon: 0.147, Smartness: 0.62
Wins: 5214, Losses: 5279, Draws: 2708
Episode: 13300, Recent Win Rate: 5.22, Epsilon: 0.138, Smartness: 0.62
Wins: 5233, Losses: 5329, Draws: 2739
Episode: 13400, Recent Win Rate: 5.25, Epsilon: 0.130, Smartness: 0.62
Wins: 5259, Losses: 5376, Draws: 2766
Episode: 13500, Recent Win Rate: 5.27, Epsilon: 0.122, Smartness: 0.62
Wins: 5280, Losses: 5420, Draws: 2801
Episode: 13600, Recent Win Rate: 5.30, Epsilon: 0.115, Smartness: 0.62
Wins: 5309, Losses: 5462, Draws: 2830
Episode: 13700, Recent Win Rate: 5.33, Epsilon: 0.109, Smartness: 0.62
Wins: 5336, Losses: 5505, Draws: 2860
Episode: 13800, Recent Win Rate: 5.35, Epsilon: 0.102, Smartness: 0.62
Wins: 5362, Losses: 554



Episode: 14000, Recent Win Rate: 5.40, Epsilon: 0.091, Smartness: 0.62
Wins: 5412, Losses: 5641, Draws: 2948
Checkpoint saved at episode 14000
Episode: 14100, Recent Win Rate: 5.44, Epsilon: 0.085, Smartness: 0.62
Wins: 5448, Losses: 5681, Draws: 2972
Episode: 14200, Recent Win Rate: 5.47, Epsilon: 0.080, Smartness: 0.62
Wins: 5482, Losses: 5722, Draws: 2997
Episode: 14300, Recent Win Rate: 5.50, Epsilon: 0.076, Smartness: 0.62
Wins: 5508, Losses: 5770, Draws: 3023
Episode: 14400, Recent Win Rate: 5.52, Epsilon: 0.071, Smartness: 0.62
Wins: 5532, Losses: 5809, Draws: 3060
Episode: 14500, Recent Win Rate: 5.55, Epsilon: 0.067, Smartness: 0.62
Wins: 5563, Losses: 5851, Draws: 3087
Episode: 14600, Recent Win Rate: 5.58, Epsilon: 0.063, Smartness: 0.62
Wins: 5591, Losses: 5891, Draws: 3119
Episode: 14700, Recent Win Rate: 5.61, Epsilon: 0.060, Smartness: 0.62
Wins: 5620, Losses: 5929, Draws: 3152
Episode: 14800, Recent Win Rate: 5.64, Epsilon: 0.056, Smartness: 0.62
Wins: 5650, Losses: 596



Resetting epsilon to 1.0
Episode: 15000, Recent Win Rate: 5.69, Epsilon: 1.000, Smartness: 0.80
Wins: 5703, Losses: 6029, Draws: 3269
Checkpoint saved at episode 15000
Episode: 15100, Recent Win Rate: 5.69, Epsilon: 0.942, Smartness: 0.80
Wins: 5703, Losses: 6117, Draws: 3281
Episode: 15200, Recent Win Rate: 5.70, Epsilon: 0.887, Smartness: 0.80
Wins: 5707, Losses: 6201, Draws: 3293
Episode: 15300, Recent Win Rate: 5.70, Epsilon: 0.835, Smartness: 0.80
Wins: 5712, Losses: 6285, Draws: 3304
Episode: 15400, Recent Win Rate: 5.71, Epsilon: 0.787, Smartness: 0.80
Wins: 5716, Losses: 6366, Draws: 3319
Episode: 15500, Recent Win Rate: 5.72, Epsilon: 0.741, Smartness: 0.80
Wins: 5730, Losses: 6437, Draws: 3334
Episode: 15600, Recent Win Rate: 5.73, Epsilon: 0.698, Smartness: 0.80
Wins: 5736, Losses: 6516, Draws: 3349
Episode: 15700, Recent Win Rate: 5.74, Epsilon: 0.657, Smartness: 0.80
Wins: 5751, Losses: 6586, Draws: 3364
Episode: 15800, Recent Win Rate: 5.75, Epsilon: 0.619, Smartness: 0.8



Episode: 16000, Recent Win Rate: 5.76, Epsilon: 0.549, Smartness: 0.80
Wins: 5774, Losses: 6815, Draws: 3412
Checkpoint saved at episode 16000
Episode: 16100, Recent Win Rate: 5.77, Epsilon: 0.517, Smartness: 0.80
Wins: 5778, Losses: 6891, Draws: 3432
Episode: 16200, Recent Win Rate: 5.78, Epsilon: 0.487, Smartness: 0.80
Wins: 5791, Losses: 6956, Draws: 3454
Episode: 16300, Recent Win Rate: 5.80, Epsilon: 0.458, Smartness: 0.80
Wins: 5807, Losses: 7017, Draws: 3477
Episode: 16400, Recent Win Rate: 5.81, Epsilon: 0.432, Smartness: 0.80
Wins: 5819, Losses: 7069, Draws: 3513
Episode: 16500, Recent Win Rate: 5.82, Epsilon: 0.406, Smartness: 0.80
Wins: 5831, Losses: 7126, Draws: 3544
Episode: 16600, Recent Win Rate: 5.83, Epsilon: 0.383, Smartness: 0.80
Wins: 5840, Losses: 7183, Draws: 3578
Episode: 16700, Recent Win Rate: 5.85, Epsilon: 0.360, Smartness: 0.80
Wins: 5858, Losses: 7237, Draws: 3606
Episode: 16800, Recent Win Rate: 5.86, Epsilon: 0.339, Smartness: 0.80
Wins: 5875, Losses: 728



Episode: 17000, Recent Win Rate: 5.89, Epsilon: 0.301, Smartness: 0.80
Wins: 5905, Losses: 7401, Draws: 3695
Checkpoint saved at episode 17000
Episode: 17100, Recent Win Rate: 5.91, Epsilon: 0.284, Smartness: 0.80
Wins: 5919, Losses: 7461, Draws: 3721
Episode: 17200, Recent Win Rate: 5.92, Epsilon: 0.267, Smartness: 0.80
Wins: 5934, Losses: 7517, Draws: 3750
Episode: 17300, Recent Win Rate: 5.94, Epsilon: 0.251, Smartness: 0.80
Wins: 5951, Losses: 7568, Draws: 3782
Episode: 17400, Recent Win Rate: 5.96, Epsilon: 0.237, Smartness: 0.80
Wins: 5968, Losses: 7615, Draws: 3818
Episode: 17500, Recent Win Rate: 5.97, Epsilon: 0.223, Smartness: 0.80
Wins: 5985, Losses: 7666, Draws: 3850
Episode: 17600, Recent Win Rate: 5.99, Epsilon: 0.210, Smartness: 0.80
Wins: 6004, Losses: 7712, Draws: 3885
Episode: 17700, Recent Win Rate: 6.00, Epsilon: 0.198, Smartness: 0.80
Wins: 6016, Losses: 7770, Draws: 3915
Episode: 17800, Recent Win Rate: 6.02, Epsilon: 0.186, Smartness: 0.80
Wins: 6034, Losses: 781



Episode: 18000, Recent Win Rate: 6.05, Epsilon: 0.165, Smartness: 0.80
Wins: 6064, Losses: 7910, Draws: 4027
Checkpoint saved at episode 18000
Episode: 18100, Recent Win Rate: 6.06, Epsilon: 0.156, Smartness: 0.80
Wins: 6076, Losses: 7957, Draws: 4068
Episode: 18200, Recent Win Rate: 6.08, Epsilon: 0.147, Smartness: 0.80
Wins: 6093, Losses: 7995, Draws: 4113
Episode: 18300, Recent Win Rate: 6.10, Epsilon: 0.138, Smartness: 0.80
Wins: 6111, Losses: 8041, Draws: 4149
Episode: 18400, Recent Win Rate: 6.12, Epsilon: 0.130, Smartness: 0.80
Wins: 6130, Losses: 8085, Draws: 4186
Episode: 18500, Recent Win Rate: 6.14, Epsilon: 0.122, Smartness: 0.80
Wins: 6152, Losses: 8124, Draws: 4225
Episode: 18600, Recent Win Rate: 6.16, Epsilon: 0.115, Smartness: 0.80
Wins: 6172, Losses: 8168, Draws: 4261
Episode: 18700, Recent Win Rate: 6.17, Epsilon: 0.109, Smartness: 0.80
Wins: 6187, Losses: 8202, Draws: 4312
Episode: 18800, Recent Win Rate: 6.19, Epsilon: 0.102, Smartness: 0.80
Wins: 6205, Losses: 824



Episode: 19000, Recent Win Rate: 6.22, Epsilon: 0.091, Smartness: 0.80
Wins: 6232, Losses: 8321, Draws: 4448
Checkpoint saved at episode 19000
Episode: 19100, Recent Win Rate: 6.23, Epsilon: 0.085, Smartness: 0.80
Wins: 6244, Losses: 8360, Draws: 4497
Episode: 19200, Recent Win Rate: 6.25, Epsilon: 0.080, Smartness: 0.80
Wins: 6261, Losses: 8404, Draws: 4536
Episode: 19300, Recent Win Rate: 6.27, Epsilon: 0.076, Smartness: 0.80
Wins: 6285, Losses: 8439, Draws: 4577
Episode: 19400, Recent Win Rate: 6.29, Epsilon: 0.071, Smartness: 0.80
Wins: 6302, Losses: 8477, Draws: 4622
Episode: 19500, Recent Win Rate: 6.32, Epsilon: 0.067, Smartness: 0.80
Wins: 6328, Losses: 8514, Draws: 4659
Episode: 19600, Recent Win Rate: 6.33, Epsilon: 0.063, Smartness: 0.80
Wins: 6345, Losses: 8551, Draws: 4705
Episode: 19700, Recent Win Rate: 6.35, Epsilon: 0.060, Smartness: 0.80
Wins: 6367, Losses: 8582, Draws: 4752
Episode: 19800, Recent Win Rate: 6.38, Epsilon: 0.056, Smartness: 0.80
Wins: 6388, Losses: 861



Final model saved as modle16_final_final.h5
