In [None]:
from env import CarEnv
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from DataTracker import DataTracker
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
total_timesteps = 150000  # 300k
learning_rate = 0.0005  # 0.004 (4*10^-3) recommended
ent_coef = 0.01
gamma = 0.99
gae_lambda = 0.95
max_grad_norm = 0.5

In [None]:
env = Monitor(CarEnv())
data_tracker = DataTracker()
model = PPO(
    "MlpPolicy",
    env,
    policy_kwargs=dict(net_arch=[256, 256]),
    learning_rate=learning_rate,
    ent_coef=ent_coef,
    gamma=gamma,
    gae_lambda=gae_lambda,
    max_grad_norm=max_grad_norm,
    tensorboard_log="ppo_logs",
)

try:
    model.learn(int(2e5), callback=data_tracker)
except KeyboardInterrupt:
    print("Saving model due to KeyboardInterrupt")
finally:
    model.save("models/ppo_1")
    data_tracker.save("metrics/ppo/ppo_1.csv")

    episode_rewards = env.get_episode_rewards()
    episode_lengths = env.get_episode_lengths()
    ep_df = pd.DataFrame({"episode_rewards": episode_rewards, "episode_lengths": episode_lengths})
    ep_df.to_csv("metrics/ppo/ppo_1_episodes.csv")

In [None]:
model = PPO.load("models/ppo")
env = CarEnv()

while True:
    done = truncated = False
    obs, info = env.reset()
    while not (done or truncated):
        action, _states = model.predict(obs, deterministic=False)
        obs, reward, done, _trunc, info = env.step(action)
        env.render()

Second Iteration


In [None]:
model_path = "models/ppo_1"
new_model_path = "models/ppo_2"

env = Monitor(CarEnv())
data_tracker = DataTracker()
model = PPO.load(model_path, env=env)

try:
    model.learn(int(2e5), callback=data_tracker)
except KeyboardInterrupt:
    print("Saving model due to KeyboardInterrupt")
finally:
    model.save(new_model_path)
    data_tracker.save("metrics/ppo/ppo_2.csv")

    episode_rewards = env.get_episode_rewards()
    episode_lengths = env.get_episode_lengths()
    ep_df = pd.DataFrame({"episode_rewards": episode_rewards, "episode_lengths": episode_lengths})
    ep_df.to_csv("metrics/ppo/ppo_2_episodes.csv")

In [None]:
model = PPO.load("models/ppo_2")
env = CarEnv()

while True:
    done = truncated = False
    obs, info = env.reset()
    while not (done or truncated):
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, _trunc, info = env.step(action)
        env.render()