In [3]:
import numpy as np
from smac.env import StarCraft2Env

In [52]:
# 创建地图为 3m, 难度为 3 的环境
env = StarCraft2Env(map_name="3m", difficulty="3")

'''
环境信息
state_shape    状态向量维度
obs_shape      观察向量维度
n_actions      可用行动数
n_agents       可用单位数
episode_limit  最大迭代次数
'''
env_info = env.get_env_info()
print(env_info)

{'state_shape': 48, 'obs_shape': 30, 'n_actions': 9, 'n_agents': 3, 'episode_limit': 60}


In [64]:
# 启动游戏环境
env.reset()
print("==== game start ===")

==== game start ===


In [24]:
'''
获取每个单位的观察向量(如果死了全为 0)
可能没有的项后加*
[part1 移动特征]
0/1/2/3: move n/s/e/w
[part2 敌方特征] x 3 (不在视野范围内时标 * 的全没有, 存在的全为 0)
0: 是否能攻击
1: 距离
2: 相对 X
3: 相对 Y
4: 百分比生命*
5: 百分比护盾*
6: 单位类型*
[part3 友军特征] x 2 (不在视野范围内时标 * 的全没有, 存在的全为 0)
0: 是否可见
1: 距离
2: 相对 X
3: 相对 Y
4: 百分比生命*
5: 百分比护盾*
6: 单位类型*
[part4 自身特征]
0: 百分比生命
1: 百分比护盾*
2: 单位类型
'''
obs = env.get_obs()
print(len(obs))     # = n_agents
print(len(obs[0]))  # = obs_shape (30 = 4 + (0123 = 4) * 3 + (012346 = 6) * 2 + (02 = 2))
print(obs[0])

3
30
[1.         1.         1.         1.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         1.         0.0764974  0.         0.0764974  1.
 1.         0.10818365 0.0764974  0.0764974  1.         1.        ]


In [25]:
# 获取全局状态
state = env.get_state()
print(len(state))  # = state_shape
print(state)

48
[ 1.          0.         -0.25       -0.02458845  1.          0.
 -0.25        0.          1.          0.         -0.22541155  0.
  1.          0.25        0.          1.          0.25       -0.02458845
  1.          0.27458844  0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.        ]


In [26]:
# agent_id 不能超过 n_agents
agent_id = 0
'''
可用的行动
0: no-op(活着的时候不能选择)
1: stop
2: move north
3: move south
4: move east
5: move west
6/7/8: can attack enemy 1/2/3
'''
avail_actions = env.get_avail_agent_actions(agent_id)
print(avail_actions)

[0, 1, 1, 1, 1, 1, 0, 0, 0]


In [54]:
# 让所有单位向东移动
actions = []
for i in range(n_agents):
    actions.append(4)
# step 接收一个全部单位行动的数组
reward, terminated, _ = env.step(actions)
print(reward)      # 奖赏(纯移动时为 0)
print(terminated)  # 是否结束
print(_)           # 是否胜利

0.0
False
{'battle_won': False}


In [65]:
def no_enemy_in_sight():
    for i in range(n_agents):
        a = env.get_avail_agent_actions(i)
        if (a[6] + a[7] + a[8] > 1):
            return False
    return True

# 持续移动直到双方相遇
while no_enemy_in_sight():
    actions = []
    for i in range(n_agents):
        actions.append(4)
    reward, terminated, _ = env.step(actions)

In [66]:
# 可以看到此时角色的观察向量已经发生改变
print(env.get_obs_agent(0))
# 也已经可以攻击敌方单位
print(env.get_avail_agent_actions(0))

[1.         1.         1.         1.         0.         0.699999
 0.6906738  0.11387804 1.         1.         0.61815816 0.6181098
 0.00773112 1.         1.         0.62583816 0.6201443  0.08422852
 1.         1.         0.0764974  0.         0.0764974  1.
 1.         0.10818365 0.0764974  0.0764974  0.73333335 1.        ]
[0, 1, 1, 1, 1, 1, 0, 1, 1]


In [68]:
# 你可以看到你的机枪兵集火敌方单位直到其死亡
# 虽然因为先被打的是你们, 所以只打死一两个敌人之后你们就死光光了(x
def all_dead():
    for i in range(n_agents):
        if (env.get_avail_agent_actions(i)[0] != 1):
            return False
    return True

while not all_dead():
    actions = []
    for i in range(n_agents):
        dead = env.get_avail_agent_actions(i)[0]
        if dead == 1:
            actions.append(0)
        else:
            hit = 0
            for j in [6, 7, 8]:
                if env.get_avail_agent_actions(i)[j] == 1:
                    actions.append(j)
                    hit = 1
                    break
            if hit == 0:
                actions.append(1)
    reward, terminated, _ = env.step(actions)
    # 战斗会产生奖励
    end_str = ''
    if not terminated:
        end_str += 'not '
    win_str = ''
    if terminated:
        win_str = ', you '
        if _['battle_won']:
            win_str += 'win'
        else:
            win_str += 'lose'
    print("battle %send%s: reward is %f" % (end_str, win_str, reward))

battle not end: reward is 0.986301
battle not end: reward is 0.000000
battle not end: reward is 0.986301
battle not end: reward is 0.657534
battle not end: reward is 0.328767
battle not end: reward is 1.041096
battle not end: reward is 0.000000
battle not end: reward is 0.657534
battle not end: reward is 0.000000
battle not end: reward is 0.657534
battle not end: reward is 0.000000
battle not end: reward is 0.712329
battle not end: reward is 0.328767
battle not end: reward is 0.328767
battle not end: reward is 0.328767
battle not end: reward is 0.000000
battle not end: reward is 0.328767
battle not end: reward is 0.000000
battle not end: reward is 0.328767
battle not end: reward is 0.000000
battle not end: reward is 0.328767
battle not end: reward is 0.328767
battle end, you lose: reward is 0.000000


In [63]:
# 关闭游戏环境
env.close()