In this tutorial, we will introduce the data-closed-loop tools, 

including data engine and data-driven planning tools.

Before reading this, it is recommended to get through the interface.ipynb.

It is based on spider-python==0.1.16.4

If you are using a different version, the code may not work as expected. 

# 1. How to collect data?

In [1]:
import spider
from spider.interface.BaseBenchmark import DummyBenchmark
from spider.planner_zoo import LatticePlanner
from spider.data.DataBuffer import LogBuffer

%matplotlib

# 用log_buffer.apply_to(planner)启用
benchmark = DummyBenchmark({
    "snapshot": False,
    "map_frequency": 1, # 记录地图数据
    # "racetrack": "straight",
})

planner = LatticePlanner({
    "steps": 20,
    "dt": 0.2,
    "print_info": False
})

log_buffer = LogBuffer(
    autosave_max_intervals=100,
    file_format=spider.DATA_FORMAT_RAW,
    # file_format=spider.DATA_FORMAT_JSON,
    data_root='./dataset/'
)

log_buffer.apply_to(planner)

for episode in range(5):
    benchmark.test(planner)

log_buffer.release()

Cython module not found. Use pyximport temporally
Using matplotlib backend: <object object at 0x000001EFFE9FACE0>
LogBuffer: Log Buffer is listening to the planner.
LogBuffer: All data will be automatically recorded...
A segment containing 100 log records has been saved to ./dataset/log_segment0000
A segment containing 100 log records has been saved to ./dataset/log_segment0001
A segment containing 100 log records has been saved to ./dataset/log_segment0002
A segment containing 100 log records has been saved to ./dataset/log_segment0003


# 2. How to train IL?

In [None]:

from spider.data.Dataset import OfflineLogDataset
from spider.planner_zoo.MlpPlanner import MlpPlanner


# test the planner
train = 0
test_mode_closed_loop = 1

# setup the planner
planner = MlpPlanner({
    "steps": 20,
    "dt": 0.2,
    "num_object": 5,
    "normalize": False,
    "relative": False,
    "longitudinal_range": (-50, 100),
    "lateral_range": (-20,20),

    "learning_rate": 0.0001,
    "enable_tensorboard": True,
    "tensorboard_root": './tensorboard/'
})

# setup the dataset
dataset = OfflineLogDataset('./dataset/', planner.state_encoder, planner.action_encoder)
train_loader = dataset.get_dataloader(batch_size=64, shuffle=True)  #DataLoader(dataset, batch_size=64, shuffle=True)

# train_mode the planner
if train:
    planner.policy.learn_dataset(100, train_loader=train_loader)

    # save the model
    planner.save_state_dict('mlp.pth')

# load the model
planner.load_state_dict('mlp.pth')


if test_mode_closed_loop:
    from spider.interface.BaseBenchmark import DummyBenchmark
    benchmark = DummyBenchmark({
        "save_video": True,
    })
    benchmark.test(planner)
else:
    dataset.replay(planner, 0, recording=True)


# 3. How to train RL?

In [None]:
import spider.visualize as vis
import tqdm
from spider.interface import DummyInterface, DummyBenchmark
from spider.planner_zoo.DQNPlanner import DQNPlanner
from spider.rl.reward.TrajectoryReward import TrajectoryReward

class Trainner:
    '''
    todo:以后加一个把环境打包成gym环境的功能
    '''
    def __init__(self, env_interface, reward_function, visualize=False):
        self.env_interface = env_interface
        self.reward_function = reward_function
        self._visualize = visualize


    def train(self, planner, train_steps, batch_size=64):
        # todo: 是一个step触发训练，还是一个episode触发训练？
        #  以及一轮训练的次数是1吗？可以参考stable baselines3

        policy = planner.policy
        exp_buffer = planner.exp_buffer

        exp_buffer.apply_to(policy, self.reward_function)  # 开始监听

        obs, done = None, True

        policy.set_exploration(enable=True)

        for i in tqdm.tqdm(range(train_steps)):
            if done:
                obs = self.env_interface.reset()

            # forward
            plan = planner.plan(*obs) # 监听exp_buffer记录了obs, plan
            self.env_interface.conduct_trajectory(plan)
            obs2 = self.env_interface.wrap_observation()

            # feedback
            reward, done = self.reward_function.evaluate_log(obs, plan, obs2) # 监听exp_buffer记录了reward, done
            policy.try_write_reward(reward, done, i)

            # 学习
            batched_data = exp_buffer.sample(batch_size)
            policy.learn_batch(*batched_data)

            # visualize
            if self._visualize:
                vis.cla()
                vis.lazy_draw(*obs, plan)
                vis.title(f"Step {i}, Reward {reward}")
                vis.pause(0.001)

            obs = obs2

        policy.set_exploration(enable=False)


In [None]:

# presets
ego_size = (5.,2.)

# setup env
env_interface = DummyInterface()

# setup reward
reward_function = TrajectoryReward(
    (-10., 280.), (-15, 15), (240., 280.), (-10,10), ego_size
)

# setup_planner
planner_dqn = DQNPlanner({
    "ego_veh_width": ego_size[1],
    "ego_veh_length": ego_size[0],
    "enable_tensorboard": True,
})

planner_school = Trainner(env_interface, reward_function, visualize=False)
planner_school.train(planner_dqn, 50000, 64)
planner_dqn.policy.save_model('./q_net.pth')

planner_dqn.policy.load_model('./q_net.pth')
DummyBenchmark({"save_video": True,}).test(planner_dqn)