In [None]:
# %%
## 2.2 在迷宫中随机探索


In [None]:
# %%
# 导入使用的包
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


In [None]:
# %%
# 初始位置时迷宫的样子

# 设置图的大小和变量名
fig = plt.figure(figsize=(5, 5))
ax = plt.gca()

# 绘制红色的墙壁
plt.plot([1, 1], [0, 1], color='red', linewidth=2)
plt.plot([1, 2], [2, 2], color='red', linewidth=2)
plt.plot([2, 2], [2, 1], color='red', linewidth=2)
plt.plot([2, 3], [1, 1], color='red', linewidth=2)

# 标注状态S0～S8
plt.text(0.5, 2.5, 'S0', size=14, ha='center')
plt.text(1.5, 2.5, 'S1', size=14, ha='center')
plt.text(2.5, 2.5, 'S2', size=14, ha='center')
plt.text(0.5, 1.5, 'S3', size=14, ha='center')
plt.text(1.5, 1.5, 'S4', size=14, ha='center')
plt.text(2.5, 1.5, 'S5', size=14, ha='center')
plt.text(0.5, 0.5, 'S6', size=14, ha='center')
plt.text(1.5, 0.5, 'S7', size=14, ha='center')
plt.text(2.5, 0.5, 'S8', size=14, ha='center')
plt.text(0.5, 2.3, 'START', ha='center')
plt.text(2.5, 0.3, 'GOAL', ha='center')

# 设置绘图范围并取消刻度
ax.set_xlim(0, 3)
ax.set_ylim(0, 3)
plt.tick_params(axis='both', which='both', bottom=False, top=False,
                labelbottom=False, right=False, left=False, labelleft=False)

# 在当前位置S0绘制绿色圆点
line, = ax.plot([0.5], [2.5], marker="o", color='g', markersize=60)


In [None]:
# %%
# 设置初始策略参数theta_0

# 行表示状态0～7，列表示移动方向 ↑、→、↓、←
theta_0 = np.array([[np.nan, 1, 1, np.nan],  # s0
                    [np.nan, 1, np.nan, 1],  # s1
                    [np.nan, np.nan, 1, 1],  # s2
                    [1, 1, 1, np.nan],  # s3
                    [np.nan, np.nan, 1, 1],  # s4
                    [1, np.nan, np.nan, np.nan],  # s5
                    [1, np.nan, np.nan, np.nan],  # s6
                    [1, 1, np.nan, np.nan],  # s7，※s8是目标，没有策略
                    ])


In [None]:
# %%
# 将策略参数theta转换为行为策略pi的函数定义


def simple_convert_into_pi_from_theta(theta):
    '''简单计算比例'''

    [m, n] = theta.shape  # 获取theta的矩阵大小
    pi = np.zeros((m, n))
    for i in range(0, m):
        pi[i, :] = theta[i, :] / np.nansum(theta[i, :])  # 计算比例

    pi = np.nan_to_num(pi)  # 将nan转换为0

    return pi



In [None]:
# %%
# 计算初始策略pi_0
pi_0 = simple_convert_into_pi_from_theta(theta_0)



In [None]:
# %%
# 显示初始策略pi_0
pi_0


In [None]:
# %%
# 定义计算1步后状态s的函数


def get_next_s(pi, s):
    direction = ["up", "right", "down", "left"]

    next_direction = np.random.choice(direction, p=pi[s, :])
    # 按pi[s,:]的概率选择一个方向

    if next_direction == "up":
        s_next = s - 3  # 向上移动时状态值减少3
    elif next_direction == "right":
        s_next = s + 1  # 向右移动时状态值增加1
    elif next_direction == "down":
        s_next = s + 3  # 向下移动时状态值增加3
    elif next_direction == "left":
        s_next = s - 1  # 向左移动时状态值减少1

    return s_next



In [None]:
# %%
# 定义让智能体在迷宫中移动直到到达目标的函数


def goal_maze(pi):
    s = 0  # 起始位置
    state_history = [0]  # 记录智能体移动的列表

    while (1):  # 循环直到到达目标
        next_s = get_next_s(pi, s)
        state_history.append(next_s)  # 将下一个状态（智能体位置）添加到记录列表

        if next_s == 8:  # 如果到达目标位置则结束
            break
        else:
            s = next_s

    return state_history



In [None]:
# %%
# 让智能体在迷宫中向目标移动
state_history = goal_maze(pi_0)



In [None]:
# %%
print(state_history)
print("解开迷宫所需的步数是" + str(len(state_history) - 1) + "步")


In [None]:
# %%
# 可视化智能体的移动过程
# 参考URL http://louistiao.me/posts/notebooks/embedding-matplotlib-animations-in-jupyter-notebooks/
from matplotlib import animation
from IPython.display import HTML


def init():
    '''初始化背景图像'''
    line.set_data([], [])
    return (line,)


def animate(i):
    '''每帧的绘制内容'''
    state = state_history[i]  # 绘制当前位置
    x = (state % 3) + 0.5  # 状态的x坐标是状态值除以3的余数+0.5
    y = 2.5 - int(state / 3)  # y坐标是状态值除以3的商从2.5减去
    line.set_data(x, y)
    return (line,)


# 使用初始化函数和每帧绘制函数生成动画
anim = animation.FuncAnimation(fig, animate, init_func=init, frames=len(
    state_history), interval=200, repeat=False)

HTML(anim.to_jshtml())


In [None]:
# %%