# Play Doom Level using Trained PPO Agent  

In [None]:
from utils.env import make_doom_env
from agents.doom_ppo_agent import DoomPpoAgent
import gym
import torch

# Creating Environment
num_envs = 1
envs = gym.vector.SyncVectorEnv([ make_doom_env(level_config_path='vizdoom/scenarios/basic.cfg', record_episodes=False, render=True) for i in range(num_envs)])

# Prefering GPU
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.has_mps:
    device = torch.device("mps")
else:
    device = torch.device("cpu")

# Setting up agent
agent = DoomPpoAgent(envs.single_observation_space, 
                     envs.single_action_space, 
                    #  models_path='./models/doom_ppo_agent/training_run_2023_07_09_16_55_38/checkpoint_step_38912')
                     models_path='./models/doom_ppo_agent/training_run_2023_07_07_02_24_27/checkpoint_step_292864')
agent.to(device)

# Playing config
number_of_episodes = 50

In [None]:
import time
import torch
import numpy as np

for episode in range(number_of_episodes):
    observation = envs.reset()
    done = np.zeros(num_envs, dtype=np.int32)
    total_reward = 0

    while not done.all():
        # Getting next action and it's value
        with torch.no_grad():
            action, log_prob, _, value = agent.forward(torch.Tensor(observation).to(device))
        observation, reward, done, info = envs.step(action.cpu().numpy())
        total_reward += reward

        time.sleep(1/30)

    print(f"Total reward for episode {episode} is {total_reward}")
    time.sleep(0.25)

In [None]:
envs.close()