In [1]:
# https://github.com/andywu0913/OpenAI-GYM-CarRacing-DQN/tree/master
# https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html
# https://github.com/pytorch/tutorials/blob/main/intermediate_source/reinforcement_q_learning.py

In [2]:
import gym
from collections import deque
import torch
import pandas as pd

from agent import Agent
from processing import process_state_image, generate_state_frame_stack_from_queue

### Variables

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [4]:
# Set the folder path of the model you want to test
folder = "runs/run_20240703161917"

In [5]:
# Read config file to get the configurations
config_df = pd.read_csv(folder + '/configurations.csv')
configurations = pd.Series(config_df.Value.values, index=config_df.Parameter).to_dict()

In [6]:
seed = int(configurations['seed'])
episodes = int(configurations['episodes'])
frame_stack_num = int(configurations['frame_stack_num'])
memory_size = int(configurations['memory_size'])
gamma = configurations['gamma']
epsilon = configurations['epsilon']
epsilon_min = configurations['epsilon_min']
epsilon_decay = configurations['epsilon_decay']
learning_rate = configurations['learning_rate']
batch_size = int(configurations['batch_size'])
update_every_n = int(configurations['update_every_n'])
negative_reward_counter_n = int(configurations['negative_reward_counter_n'])

In [7]:
env = gym.make('CarRacing-v2', render_mode='rgb_array', continuous=False)
testing_agent = Agent(
    action_space = env.action_space, 
    frame_stack_num = frame_stack_num,
    memory_size = memory_size,
    gamma = gamma,
    epsilon = epsilon,
    epsilon_min = epsilon_min,
    epsilon_decay = epsilon_decay,
    learning_rate = learning_rate,
    seed = seed)
testing_agent.load(folder + '/trained_model.pth')

In [8]:
def testing(env, test, seed):
    state, info = env.reset(seed=seed)
    env = gym.wrappers.RecordVideo(env=env, video_folder=folder + '/Video', video_length=0)

    init_state = process_state_image(state)

    state_frame_stack_queue = deque([init_state] * test.frame_stack_num, maxlen=test.frame_stack_num)
    total_reward = 0

    while True:
        current_state_frame_stack = generate_state_frame_stack_from_queue(state_frame_stack_queue)
        action = test.act(current_state_frame_stack, testing=True)

        reward = 0
        for _ in range(frame_stack_num-1):
            next_state, r, terminated, truncated, info = env.step(action)
            if terminated or truncated:
                done = True
            else:
                done = False
            reward += r
            if done:
                break

        total_reward += reward

        next_state = process_state_image(next_state)
        state_frame_stack_queue.append(next_state)

        if done:
            break

    print(f"Total reward after playing the game once: {total_reward}")
    env.close()
    return total_reward

In [9]:
testing(env, testing_agent, seed);

Moviepy - Building video c:\Users\fabia\Desktop\Studium\RL\RL_CarRacing\Training\runs\run_20240703161917\Videos\rl-video-episode-0.mp4.
Moviepy - Writing video c:\Users\fabia\Desktop\Studium\RL\RL_CarRacing\Training\runs\run_20240703161917\Videos\rl-video-episode-0.mp4



                                                                

Moviepy - Done !
Moviepy - video ready c:\Users\fabia\Desktop\Studium\RL\RL_CarRacing\Training\runs\run_20240703161917\Videos\rl-video-episode-0.mp4
Total reward after playing the game once: -26.315789473684077




-26.315789473684077