In [None]:
import wandb

In [None]:
# Initialize wandb
wandb.init(project="lunarLander_gif_git", entity="rl_proj")

In [None]:
import gymnasium as gym
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from PIL import ImageFont, ImageDraw, Image
import cv2

DEVICE="cpu"
ACTION_SPACE = [0,1,2,3]
EPISODES = 100
STEPS = 250
RENDER=True

class ReinforceNetwork(nn.Module):
    def __init__(self, n_inputs, n_outputs):
        super(ReinforceNetwork, self).__init__()
        self.fc1 = nn.Linear(n_inputs, 16)
        self.fc2 = nn.Linear(16, 32)
        self.fc3 = nn.Linear(32, n_outputs)

    def forward(self, x):
        x = x.unsqueeze(0) if x.dim() == 1 else x
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        actions = torch.softmax(self.fc3(x), dim=-1)
        action = self.get_action(actions)
        log_prob_action = torch.log(actions.squeeze(0))[action]
        return action, log_prob_action

    def get_action(self, actions):
        return np.random.choice(ACTION_SPACE, p=actions.squeeze(0).detach().cpu().numpy())

font                   = cv2.FONT_HERSHEY_SIMPLEX
bottomLeftCornerOfText = (10,500)
fontScale              = 1
fontColor              = (255,255,255)
lineType               = 2


model = ReinforceNetwork(8, 4).to(DEVICE)
model.load_state_dict(torch.load("/workspaces/RL_Project/outputs/best_parameters_24_11.pth"))
model.eval()

env = gym.make("LunarLander-v2", render_mode="rgb_array")
print(env.action_space,env.observation_space)
fig = plt.figure()
ims = []
total_rewards_all_episodes = []  # Almacenará las recompensas totales de cada episodio

for episode in range(EPISODES):
    state, _ = env.reset()
    state = torch.tensor(state, dtype=torch.float32, device=DEVICE)
    episode_rewards = []
    for step in range(STEPS):
        img = env.render()
        action, log_prob = model(state)
        state, reward, done, info = env.step(action)[:4]
        state = torch.tensor(state, dtype=torch.float32, device=DEVICE)
        episode_rewards.append(reward)

        # Visualización y almacenamiento de la imagen
        cv2_im_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)
        draw = ImageDraw.Draw(pil_im)
        draw.text((0, 0), f"Step: {step} Action: {action} Reward: {int(reward)} Total Rewards: {int(np.sum(episode_rewards))} done: {done}", fill="#FDFEFE")
        img = cv2.cvtColor(np.array(pil_im), cv2.COLOR_RGB2BGR)
        im = plt.imshow(img, animated=True)
        ims.append([im])

        if done:
            break

    total_rewards_all_episodes.append(np.sum(episode_rewards))

env.close()

In [None]:


# Graficar la suma de recompensas por episodio
plt.figure(figsize=(12, 6))
plt.plot(total_rewards_all_episodes)
plt.title("Reward return for episode")
plt.xlabel("Episode")
plt.ylabel("Reward return")
plt.grid()

wandb.log({"Reward return for episode": wandb.Image(plt)})

plt.show()

# Guardar la animación
Writer = animation.writers['pillow']
writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
im_ani = animation.ArtistAnimation(fig, ims, interval=50, repeat_delay=3000, blit=True)
im_ani.save('gif_git.gif', writer=writer)



# Log the GIF to wandb
wandb.log({"animation": wandb.Video("gif_git.gif", fps=4, format="gif")})

# Finish the wandb run
wandb.finish()