In [None]:
import os
import gym
import random
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from collections import namedtuple, deque
import time
from ale_py import ALEInterface
import imageio
import matplotlib.pyplot as plt
import warnings
import logging
from utils_dueling import *

In [1]:

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # 0 = all messages, 1 = filter out INFO, 2 = filter out WARNING, 3 = filter out ERROR
tf.get_logger().setLevel('ERROR')

warnings.filterwarnings('ignore')


gym_logger = logging.getLogger('gym')
gym_logger.setLevel(logging.ERROR)

2023-12-13 10:17:12.186924: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-13 10:17:13.034613: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-13 10:17:13.034750: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-13 10:17:13.239486: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-13 10:17:14.048728: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-13 10:17:14.061946: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [2]:
#API KEY: 73c9a156b91f3e0c01c3d5f332d23bfc66f4cdbf

In [3]:
import wandb
wandb.init(project="DuelingDQN", entity = "rl_proj")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33m1630423[0m ([33mrl_proj[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [None]:

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

np.random.seed(42)
tf.random.set_seed(42)

ale = ALEInterface()


config = Config()
best_reward = 0

env = gym.make("Assault-v4", render_mode="rgb_array")
n_actions = env.action_space.n


model = DuelingDQN(n_actions)
model_target = DuelingDQN(n_actions)
memory = ReplayMemory(config.MEMORY_SIZE)


optimizer = keras.optimizers.Adam(learning_rate=config.LEARNING_RATE, clipnorm=1.0)
loss_function = keras.losses.Huber()

episode_rewards = []
losses = []

best_reward = 0
best_episode = 0
best_frames = []

epsilon = config.EPSILON_START


for episode in range(config.EPISODES):
    state, info = env.reset()  
    state = state / 255.0
    #state = env.reset() / 255.0
    done = False
    episode_reward = 0
    steps = 0
    info ={'lives': 4, 'episode_frame_number': 2, 'frame_number': 2}
    frames = []
    

    while not done and info.get("lives") > 0: #steps < config.MAX_STEPS_PER_EPISODE and info.get("lives") >= 0: #info.get("lives") > 0:
        action = take_action(state, epsilon, env , model)
        step_result = env.step(action)
        next_state, reward, done, _, info = step_result[:5]
        next_state = next_state / 255.0
        #print("info: ", info)

        memory.push(state, action, next_state, reward, done)
        optimize_model(memory, config, model, model_target, n_actions, loss_function , optimizer)

        frame = env.render()
        frames.append(frame)

        state = next_state
        episode_reward += reward

        if reward != 0:
            print("step: ", steps, "action: ", action, " reward: ", reward)
            print("Lives: ", info.get("lives"))

        steps += 1
        
        #epsilon = max(epsilon - (EPSILON_START - EPSILON_END) / EPSILON_DECAY, EPSILON_END)
        
        
    print(f"\nEpisodio: {episode+1}, Recompensa: {episode_reward}, Epsilon: {epsilon}")
    
    if episode_reward > best_reward:
        best_reward = episode_reward
        # Ensure the model is built
        if not model.built:
            dummy_input = np.zeros((1, *env.observation_space.shape))
            model(dummy_input)
        # Save the model
        #CAMBIAR PATH a carpeta duelingDQN
        model_save_path = f"./dueling_model"
        model.save(model_save_path)
        print(f"Model saved")

        gif_path = f"./episode_{episode+1}_reward_{episode_reward}.gif"
        imageio.mimsave(gif_path, frames, format='GIF', fps=30)
    
    epsilon = max((epsilon * config.EPSILON_DECAY_RATE), config.EPSILON_END)
    
    wandb.log({"episode": episode + 1, "reward": episode_reward, "epsilon": epsilon})
    episode_rewards.append(episode_reward)
    
    if (episode + 1) % config.TARGET_UPDATE == 0:
        model_target.set_weights(model.get_weights())

env.close()


In [5]:
plot_rewards(episode_rewards, episode)


In [7]:
#TEST THE SAVED MODEL
import gym
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

dueling_model = keras.models.load_model(f"./dueling_model")

env = gym.make("Assault-v4", render_mode = "rgb_array")
n_actions = env.action_space.n

rewards_per_episode = []
best_reward = 0

for episode in range(500):
    state, info = env.reset()
    state = state / 255.0  
    done = False
    episode_reward = 0
    frames = []
    info ={'lives': 4, 'episode_frame_number': 2, 'frame_number': 2}

    while not done:
        action = np.argmax(dueling_model.predict(state[np.newaxis, ...]))
        step_result = env.step(action)
        next_state, reward, done, _, info = step_result[:5]
        next_state = next_state / 255.0  

        frame = env.render()
        frames.append(frame)

        state = next_state
        episode_reward += reward

        if reward != 0:
            print("action: ", action, " reward: ", reward)
            print("Lives: ", info.get("lives"))

    rewards_per_episode.append(episode_reward)
    print(f"Episode {episode + 1}: Reward = {episode_reward}")

    if episode_reward > best_reward:
        best_reward = episode_reward 
        
        gif_path = f"./test_episode_{episode+1}_reward_{episode_reward}.gif"
        imageio.mimsave(gif_path, frames, fps=30)

    # Log episode metrics and GIF to wandb
    wandb.log({"episode": episode + 1, "reward": episode_reward, "epsilon": epsilon, "episode_gif": wandb.Video(gif_path, fps=4, format="gif")})


env.close()

# Grafica las recompensas por episodio
plt.plot(rewards_per_episode)
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.title('Reward per Episode')
plt.show()



  if not isinstance(terminated, (bool, np.bool8)):




KeyboardInterrupt: 