In [667]:
import tkinter as tk
import time
import numpy as np
import random
from collections import defaultdict

class QLearningAgent:
    def __init__(self, actions, learning_rate=0.1, gamma=0.9, epsilon=0.9):
        self.q_table = defaultdict(lambda: np.zeros(len(actions)))
        self.lr = learning_rate
        self.gamma = gamma
        self.epsilon = epsilon
        self.actions = actions

    def choose_action(self, state):
        state_key = tuple(state)
        if np.random.uniform() < self.epsilon:    # ε-贪婪策略
            return np.random.choice(self.actions)
        else:
            return np.argmax(self.q_table[state_key])

    def learn(self, state, action, reward, next_state):
        state_key = tuple(state)
        next_key = tuple(next_state)
        q_predict = self.q_table[state_key][action]
        q_target = reward + self.gamma * np.max(self.q_table[next_key])
        self.q_table[state_key][action] += self.lr * (q_target - q_predict)

In [668]:
import tkinter as tk
import time
import numpy as np
import random
from collections import defaultdict

class Maze(tk.Tk):
    UNIT = 40  # 像素单位
    MAZE_H = 6  # 网格高度
    MAZE_W = 6  # 网格宽度

    def __init__(self):
        super().__init__()
        self.title('Maze')
        self.geometry(f'{self.MAZE_H * self.UNIT}x{self.MAZE_W * self.UNIT}')
        self._build_maze()
        self.bind_keys()  # 绑定键盘事件

    def _draw_rect(self, x, y, color):
        """绘制方块并返回对象ID"""
        center = self.UNIT / 2
        offset = center - 5
        x_center = x * self.UNIT + center
        y_center = y * self.UNIT + center
        return self.canvas.create_rectangle(
            x_center - offset, y_center - offset,
            x_center + offset, y_center + offset,
            fill=color, outline=''
        )

    def _build_maze(self):
        """初始化迷宫布局"""
        # 创建画布
        self.canvas = tk.Canvas(self, bg='white',
                               height=self.MAZE_H * self.UNIT,
                               width=self.MAZE_W * self.UNIT)
    
        # 绘制网格线
        for i in range(0, self.MAZE_W * self.UNIT, self.UNIT):
            self.canvas.create_line(i, 0, i, self.MAZE_H * self.UNIT)
        for i in range(0, self.MAZE_H * self.UNIT, self.UNIT):
            self.canvas.create_line(0, i, self.MAZE_W * self.UNIT, i)
    
        # 生成随机目标点（位于右下角区域）
        goal_min_x = self.MAZE_W // 2
        goal_min_y = self.MAZE_H // 2
        possible_goal = [
            (x, y) 
            for x in range(goal_min_x, self.MAZE_W)
            for y in range(goal_min_y, self.MAZE_H)
            if (x, y) != (0, 0)  # 排除起点
        ]
        # 如果右下角没有可用坐标则使用全图
        if not possible_goal:
            possible_goal = [(x, y) 
                            for x in range(self.MAZE_W)
                            for y in range(self.MAZE_H)
                            if (x, y) != (0, 0)]
        self.goal_x, self.goal_y = random.choice(possible_goal)
        self.goal = self._draw_rect(self.goal_x, self.goal_y, 'yellow')
    
        # 生成随机陷阱（排除起点和目标点）
        all_coords = [(x, y) 
                     for x in range(self.MAZE_W)
                     for y in range(self.MAZE_H)
                     if (x, y) not in [(0, 0), (self.goal_x, self.goal_y)]]
        hell_coords = random.sample(all_coords, 5)  # 保持原5个陷阱数量
    
        # 绘制陷阱
        self.hells = [self._draw_rect(x, y, 'black') for x, y in hell_coords]
    
        # 玩家初始位置（保持左上角不变）
        self.player = self._draw_rect(0, 0, 'red')
        
        self.canvas.pack()

    def bind_keys(self):
        """绑定键盘事件"""
        self.bind("<KeyPress-Up>", lambda _: self.move(0))
        self.bind("<KeyPress-Down>", lambda _: self.move(1))
        self.bind("<KeyPress-Right>", lambda _: self.move(2))
        self.bind("<KeyPress-Left>", lambda _: self.move(3))
        self.focus_set()  # 确保窗口获得焦点

    def move(self, action):
        """重写移动方法实现自动控制"""
        # 获取当前坐标
        x, y = self.get_grid_position()
        
        # 计算新坐标
        new_x, new_y = x, y
        if action == 0 and y > 0: new_y -= 1          # 上
        elif action == 1 and y < self.MAZE_H-1: new_y += 1 # 下
        elif action == 2 and x < self.MAZE_W-1: new_x += 1 # 右
        elif action == 3 and x > 0: new_x -= 1        # 左

        # 移动玩家
        self.canvas.move(self.player, 
                        (new_x - x) * self.UNIT,
                        (new_y - y) * self.UNIT)
        
        # 获取奖励和终止标志
                
        reward, done = self.check_game_state(new_x, new_y)
        # 获取奖励和终止标志
        is_win = reward <=0
        
        return self.get_state(), reward, done


    
    def check_game_state(self, x, y):
        """重写状态检查不重置界面"""
        if (x, y) == (self.goal_x, self.goal_y):
            return 5, True
        
        for hell in self.hells:
            if self.canvas.coords(hell) == self.canvas.coords(self.player):
                return -10, True
        
        return -0.5, False
        

    def get_grid_position(self):
        """获取当前网格坐标"""
        coords = self.canvas.coords(self.player)
        x = int(coords[0] // self.UNIT)
        y = int(coords[1] // self.UNIT)
        return x, y


    def show_message(self, text):
        """显示游戏状态信息"""
        self.canvas.create_text(self.MAZE_W * self.UNIT / 2,
                               self.MAZE_H * self.UNIT / 2,
                               text=text, font=('Arial', 32), fill='red')
        self.update()
        time.sleep(1)
        self.canvas.delete("all")
        self._build_maze()
    
    def get_state(self):
        x, y = self.get_grid_position()
        state = np.zeros((self.MAZE_H, self.MAZE_W))
        state[y, x] = 1  # 玩家位置
        state[self.goal_y, self.goal_x] = 2  # 目标位置
        for hell in self.hells:
            hell_coords = self.canvas.coords(hell)
            hell_x = int(hell_coords[0] // self.UNIT)
            hell_y = int(hell_coords[1] // self.UNIT)
            state[hell_y, hell_x] = -1  # 陷阱位置
        return state.flatten()  # 平铺成一维

    def reset(self):
        """重置游戏"""
        self._build_maze()
        return self.get_state()



In [669]:
class AutoMaze(Maze):
    def __init__(self):
        super().__init__()
        self.agent = QLearningAgent(actions=[0, 1, 2, 3])
        self.training_data = []
        self.start_time = None
        self.episode_count = 0
        self.max_episodes = 200
        self.delay = 10  # 可视化延迟（毫秒）
        
        # 禁用键盘控制
        self.unbind("<KeyPress>")
        self.start_training()

    def unbind(self, sequence):
        """解除键盘事件绑定"""
        self.bind(sequence, lambda event: None)

    def start_training(self):
        """启动训练流程"""
        self.after(0, self.run_episode)

    def run_episode(self):
        """执行单次训练回合"""
        if self.episode_count >= self.max_episodes:
            self.analyze_training_data()
            return
            
        state = self.reset()
        total_reward = 0
        done = False
        steps = 0
        self.start_time = time.time()
        self.episode_count += 1
        
        def step():
            nonlocal state, total_reward, done, steps
            ##初始分值计算
            if steps == 0:
                total_reward = 15
            action = self.agent.choose_action(state)
            next_state, reward, done = self.move(action)
            self.agent.learn(state, action, reward, next_state)
            result = ""
            total_reward += reward
            state = next_state
            steps += 1
            
            if not done:
                self.after(self.delay, step)
            else:
                if total_reward > 0:
                    result = "Win"
                else:
                    result = "Lose"
                episode_time = time.time() - self.start_time
                self.training_data.append({
                    "episode": self.episode_count,
                    "time": episode_time,
                    "steps": steps,
                    "reward": total_reward,
                    "result" : result
                })
                self.print_progress()
                self.after(0, self.run_episode)
        
        self.after(self.delay, step)



    def print_progress(self):
        """打印训练进度"""
        data = self.training_data[-1]
        print(f"Episode: {data['episode']:3d} | "
              f"Time: {data['time']:5.2f}s | "
              f"Steps: {data['steps']:3d} | " 
              f"Result: {data['result']} | "
              f"Reward: {data['reward']:6.1f} | "
              f"States: {len(self.agent.q_table)}")

    def analyze_training_data(self):
        """分析训练结果"""
        print("\n===== Training Summary =====")
        success = sum(1 for d in self.training_data if d["reward"] > 0)
        avg_time = np.mean([d["time"] for d in self.training_data])
        avg_step = np.mean([d["steps"] for d in self.training_data])
        print(f"Success Rate: {success/len(self.training_data)*100:.1f}%")
        print(f"Average Time: {avg_time:.2f}s")
        print(f"Average Step: {avg_step} per each")
        print(f"Explored States: {len(self.agent.q_table)}")

if __name__ == "__main__":
    env = AutoMaze()
    env.mainloop()

Episode:   1 | Time:  0.08s | Steps:   5 | Result: Win | Reward:    3.0 | States: 6
Episode:   2 | Time:  0.94s | Steps:  61 | Result: Lose | Reward:  -10.0 | States: 21
Episode:   3 | Time:  0.04s | Steps:   3 | Result: Win | Reward:    4.0 | States: 24
Episode:   4 | Time:  0.14s | Steps:   9 | Result: Win | Reward:    1.0 | States: 31
Episode:   5 | Time:  0.02s | Steps:   1 | Result: Win | Reward:    5.0 | States: 33
Episode:   6 | Time:  0.23s | Steps:  15 | Result: Lose | Reward:   -2.0 | States: 38
Episode:   7 | Time:  0.23s | Steps:  15 | Result: Lose | Reward:   -2.0 | States: 46
Episode:   8 | Time:  0.70s | Steps:  45 | Result: Lose | Reward:  -17.0 | States: 63
Episode:   9 | Time:  0.12s | Steps:   8 | Result: Win | Reward:    1.5 | States: 66
Episode:  10 | Time:  0.26s | Steps:  17 | Result: Lose | Reward:   -3.0 | States: 75
Episode:  11 | Time:  0.24s | Steps:  16 | Result: Lose | Reward:   -2.5 | States: 87
Episode:  12 | Time:  0.27s | Steps:  17 | Result: Lose | Re