**1. Import Dependencies**

In [None]:
import os
import numpy as np
import pandas as pd
import time

In [None]:
import gym
from stable_baselines3 import PPO      #PPO -> Proximal Policy Optimization
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy  #to evaluate the model 
from stable_baselines3.common.callbacks import EvalCallback

In [None]:
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1024, 768))
display.start()


from matplotlib import pyplot as plt, animation
%matplotlib inline
from IPython import display

def create_anim(frames, dpi, fps):
    plt.figure(figsize=(frames[0].shape[1] / dpi, frames[0].shape[0] / dpi), dpi=dpi)
    patch = plt.imshow(frames[0])
    def setup():
        plt.axis('off')
    def animate(i):
        patch.set_data(frames[i])
    anim = animation.FuncAnimation(plt.gcf(), animate, init_func=setup, frames=len(frames), interval=fps)
    return anim

def display_anim(frames, dpi=72, fps=60):
    anim = create_anim(frames, dpi, fps)
    return anim.to_jshtml()

def save_anim(frames, filename, dpi=72, fps=50):
    anim = create_anim(frames, dpi, fps)
    anim.save(filename)


class trigger:
    def __init__(self):
        self._trigger = True

    def __call__(self, e):
        return self._trigger

    def set(self, t):
        self._trigger = t

**2.Train Environment**

In [None]:
# Default environment
# environment_name = 'CarRacing-v0'    
# env = gym.make(environment_name)
# eval_env= env

# Curriculum environment
# from gym.wrappers.time_limit import TimeLimit
# from TurnRates.car_racing_curriculum import CarRacingCurriculum
# env = TimeLimit(CarRacingCurriculum(),max_episode_steps=1000)

# Evaluation environment
# from gym.wrappers.time_limit import TimeLimit
# from TurnRates.car_racing_eval import CarRacingEval
# eval_env = TimeLimit(CarRacingEval(),max_episode_steps=1000)

# Default obstacles environment
# from gym.wrappers.time_limit import TimeLimit
# from car_racing_obstacles import CarRacingObstacles
# env = TimeLimit(CarRacingObstacles(),max_episode_steps=1000)
# eval_env= env

# Curriculum obstacles environment
# from gym.wrappers.time_limit import TimeLimit
# from Obstacles.car_racing_obstacles_curriculum import CarRacingObstaclesCurriculum
# env = TimeLimit(CarRacingObstaclesCurriculum(),max_episode_steps=1000)

# Obstacles Evaluation environment
# from gym.wrappers.time_limit import TimeLimit
# from Obstacles.car_racing_obstacles_eval import CarRacingObstaclesEval
# eval_env = TimeLimit(CarRacingObstaclesEval(),max_episode_steps=1000)

# Curriculum both environment
# from gym.wrappers.time_limit import TimeLimit
# from Both.car_racing_obstacles_curriculum_both import CarRacingObstaclesCurriculumBoth
# env = TimeLimit(CarRacingObstaclesCurriculumBoth(),max_episode_steps=1000)

# Evaluation both environment
# from gym.wrappers.time_limit import TimeLimit
# from Both.car_racing_obstacles_eval_both import CarRacingObstaclesEvalBoth
# eval_env = TimeLimit(CarRacingObstaclesEvalBoth(),max_episode_steps=1000)


**3.Training Model**

In [None]:
seed=0

In [None]:
log_path = os.path.join('./runs')

# Learning Rates: 
# 0.0005- PPO Default, 0.00025- PPO Curriculum
# 0.0005- PPO Obstacles, 0.000475- PPO Obstacles Curriculum
# 0.0002- PPO Both Curriculum

model = PPO('CnnPolicy', env, learning_rate=0.000475, n_steps=1000, batch_size=1000, verbose=0, seed=0, tensorboard_log=log_path)
ppo_path = os.path.join('./Training/Saved_Models/PPO_car_best_Model_curriculum_obstacles1')

eval_callback = EvalCallback(eval_env=eval_env, best_model_save_path=ppo_path,
                              n_eval_episodes=10,
                             eval_freq=50000,verbose=1,
                             deterministic=True, render=False)
model.learn(total_timesteps=1000000,callback=eval_callback)
ppo_path = os.path.join('./Training/Saved_Models/PPO_Model_final_curriculum_obstacles1.zip')
model.save(ppo_path)

**4.Evaluating Models**- Best Model

In [None]:
ppo_path = os.path.join('./Training/Saved_Models/PPO_car_best_Model_curriculum_both/best_model.zip')

In [None]:
best_model = PPO.load(ppo_path, env=eval_env)

In [None]:
evalue = evaluate_policy(best_model, eval_env, n_eval_episodes=500, render = False)
eval_env.close()
evalue

In [None]:
def check_if_car_on_grass(car):
    """
    Checks to see if car is on the grass, which is the case if all of the car's wheels
    is not in contact with any tiles (i.e. the car is not in contact with any road or obstacle tiles).
    Note that in some cases, even if one of the wheels is grazing the grass,
    the car may not be considered to be on the grass if that wheel is still in contact with a road or obstacle tile.
    (so there is a "buffer region" around the road where the car is not considered to be on the grass).
    Args:
        car (car_racing.Car)
    Return:
        true if car is on the grass, false otherwise
    """
    cnt=0
    
    for w in car.wheels:
        if len(w.tiles)==0:
            cnt+=1
            # wheel is on the grass (not in contact with any tiles, either road or obstacle)
    if cnt==4:
        return True
    else:
        return False

In [None]:
# To calculate number of tiles covered, time taken and average proportion of time spent on grass in default environment

episodes = 500
frames = []

grass_ratios=0
tiles=0
times=0

for episode in range(1, episodes+1):
    obs = eval_env.reset()  #state = env.reset()
    done = False
    score = 0
    total_road_or_obstacle_timesteps = 0
    total_grass_timesteps = 0
    
    info={}
    
    while not done:
        
        # Check if car is on the grass
        if check_if_car_on_grass(eval_env.car):
            total_grass_timesteps += 1
        else:
            total_road_or_obstacle_timesteps += 1
                
        # frames.append(env.render(mode='rgb_array'))
        action , _ = best_model.predict(obs.copy()) 
        obs, reward, done, info = eval_env.step(action) 
        score += reward
    
    tiles+=eval_env.tile_visited_count
    times+=eval_env.t
    grass_ratios+=total_grass_timesteps/(total_grass_timesteps+total_road_or_obstacle_timesteps)
    
    print("Episode:{} Score:{}".format(episode,score))
    
print("Number of tiles:",tiles/500)
print("Time taken:",times/500)
print("Proportion of time spent on grass:",grass_ratios/500)
eval_env.close()

In [None]:
# To calculate number of tiles covered, time taken, collision/obstacle ratio and average proportion of time spent on grass in obstacle variant environment

episodes = 500
frames = []

ratios=0
grass_ratios=0
tiles=0
times=0

for episode in range(1, episodes+1):
    obs = eval_env.reset()  #state = env.reset()
    done = False
    score = 0
    total_road_or_obstacle_timesteps = 0
    total_grass_timesteps = 0
    
    info={}
    
    while not done:
        
        # Check if car is on the grass
        if check_if_car_on_grass(eval_env.car):
            total_grass_timesteps += 1
        else:
            total_road_or_obstacle_timesteps += 1
            
        # frames.append(env.render(mode='rgb_array'))
        action , _ = best_model.predict(obs.copy()) 
        obs, reward, done, info = eval_env.step(action) 
        score += reward
    
    tiles+=eval_env.tile_visited_count
    times+=eval_env.t
    ratios+=info['num_collisions']/info['num_obstacles']
    grass_ratios+=total_grass_timesteps/(total_grass_timesteps+total_road_or_obstacle_timesteps)

    print("Episode:{} Score:{}".format(episode,score))
    
print("Number of tiles:",tiles/500)
print("Time taken:",times/500)
print("Ratio:",ratios/500)
print("Proportion of time spent on grass:",grass_ratios/500)
eval_env.close()