In [4]:
# https://github.com/andywu0913/OpenAI-GYM-CarRacing-DQN/tree/master
# https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html

In [5]:
import argparse
import gym
from collections import deque
from tqdm import tqdm
import torch

from agent import Agent
from processing import process_state_image, generate_state_frame_stack_from_queue


### Variables

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [7]:
render = True
episodes = 50
frame_stack_num = 4
memory_size = 10000
gamma = 0.99
epsilon = 1.0
epsilon_min = 0.01
epsilon_decay = 0.995
learning_rate = 0.001
batch_size = 32
update_target_every = 4

In [8]:
env = gym.make('CarRacing-v2', render_mode='rgb_array', continuous=False)
# action_space, frame_stack_num, memmory_size, gamma, epsilon, epsilon_min, epsilon_decay, learning_rate
agent = Agent(
    action_space = env.action_space, 
    frame_stack_num = frame_stack_num,
    memory_size = memory_size,
    gamma = gamma,
    epsilon = epsilon,
    epsilon_min = epsilon_min,
    epsilon_decay = epsilon_decay,
    learning_rate = learning_rate)

In [9]:
progress_bar = tqdm(total=episodes, desc="Episodes")

best_reward = float('-inf')

for e in range(episodes):
    state, info = env.reset(seed=77)
    init_state = process_state_image(state)

    total_reward = 0
    negative_reward_counter = 0
    state_frame_stack_queue = deque([init_state] * agent.frame_stack_num, maxlen=agent.frame_stack_num)
    time_frame_counter = 1
    
    while True:
        current_state_frame_stack = generate_state_frame_stack_from_queue(state_frame_stack_queue)
        action = agent.act(current_state_frame_stack)

        reward = 0
        for _ in range(frame_stack_num-1):
            next_state, r, terminated, truncated, info = env.step(action)
            if terminated or truncated:
                done = True
            else: 
                done = False
            reward += r
            if done:
                break
        
        # Counts the number of negative rewards in a row
        negative_reward_counter = negative_reward_counter + 1 if time_frame_counter > 100 and reward < 0 else 0

        # Extra bonus for the model if it uses full gas
        # actions = do nothing, steer left, steer right, gas, brake
        if action == 3:
            reward *= 1.5
        
        total_reward += reward
        if total_reward > best_reward:
            best_reward = total_reward

        next_state = process_state_image(next_state)
        state_frame_stack_queue.append(next_state)
        next_state_frame_stack = generate_state_frame_stack_from_queue(state_frame_stack_queue)

        agent.memorize(current_state_frame_stack, action, reward, next_state_frame_stack, done)

        if done or negative_reward_counter >= 25 or total_reward < 0:
            progress_bar.set_postfix({"Total reward": total_reward, "Best reward": best_reward})
            progress_bar.update(1)
            break
        if len(agent.memory) > batch_size:
            agent.replay(batch_size)
        time_frame_counter += 1

    if e % update_target_every == 0:
        agent.update_target_model()

progress_bar.close()
env.close()

Episodes: 100%|██████████| 50/50 [06:53<00:00,  8.28s/it, Total reward=-0.00614]


In [10]:
def testing(env, agent, seed=77):
    state, info = env.reset(seed=seed)
    env = gym.wrappers.RecordVideo(env=env, video_folder="../Videos", name_prefix="test", episode_trigger=lambda x: x % 2 == 0)

    init_state = process_state_image(state)

    state_frame_stack_queue = deque([init_state] * agent.frame_stack_num, maxlen=agent.frame_stack_num)
    total_reward = 0

    while True:
        current_state_frame_stack = generate_state_frame_stack_from_queue(state_frame_stack_queue)
        action = agent.act(current_state_frame_stack)

        reward = 0
        for _ in range(frame_stack_num-1):
            next_state, r, terminated, truncated, info = env.step(action)
            if terminated or truncated:
                done = True
            else:
                done = False
            reward += r
            if done:
                break

        total_reward += reward

        next_state = process_state_image(next_state)
        state_frame_stack_queue.append(next_state)

        if done:
            break

    print(f"Total reward after playing the game once: {total_reward}")
    env.close()
    return total_reward

In [11]:
testing(env, agent)

  logger.warn(


Moviepy - Building video c:\Users\fabia\Desktop\Studium\RL\RL_CarRacing\Videos\test-episode-0.mp4.
Moviepy - Writing video c:\Users\fabia\Desktop\Studium\RL\RL_CarRacing\Videos\test-episode-0.mp4



                                                                

Moviepy - Done !
Moviepy - video ready c:\Users\fabia\Desktop\Studium\RL\RL_CarRacing\Videos\test-episode-0.mp4
Total reward after playing the game once: -75.43859649122798


-75.43859649122798