In [1]:
import copy
import gym
import torch

import numpy as np
import torch.nn.functional as F

from collections import deque, namedtuple
from IPython.display import HTML
from base64 import b64encode

from torch import Tensor, nn
from torch.utils.data import DataLoader
from torch.utils.data.dataset import IterableDataset
from torch.optim import AdamW

from pytorch_lightning import LightningModule, Trainer

from gym.wrappers import RecordVideo, RecordEpisodeStatistics

In [2]:
def display_video(episode=0):
  video_file = open(f'/content/videos/rl-video-episode-{episode}.mp4', "r+b").read()
  video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"
  return HTML(f"<video width=600 controls><source src='{video_url}'></video>")

In [3]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(device)

cuda:0


- Create Deep Q Learning

In [4]:
class DQN (nn.Module):
    def __init__(self, n_actions, hidden_size, obs_size):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(obs_size, hidden_size), # Camada linear com 4 entradas e 8 saídas
            nn.ReLU(), # Função de ativação ReLU
            nn.Linear(hidden_size, hidden_size),# Camada linear com 4 entradas e 8 saídas
            nn.ReLU(),
            nn.Linear(hidden_size, n_actions),
        )

    def forward(self,x):
        return self.net(x.float())

- Create a Policy

In [5]:
## Create a Policy
#  técnica amplamente usada em Reinforcement Learning (RL) para balancear exploração e exploração durante o treinamento de agentes.
def epsilon_greedy(state, env, net, epsilon=0.0):
    if np.random.random() < epsilon:
        action = env.action_space.sample()  # Escolhe uma ação aleatória
    else:
        state = torch.tensor([state]).to(device)  # Converte o estado para tensor e move para o dispositivo (CPU/GPU)
        q_values = net(state)  # Calcula os valores Q(s, a) usando a rede neural
        _, action = torch.max(q_values, dim=1)  # Seleciona a ação com o maior valor Q
        action = int(action.item())  # Converte a ação para um inteiro
    return action  # Retorna a ação escolhida

- Create Buffer replay

In [6]:
#Create Buffer replay
#O principal motivo do uso do Replay Buffer em algoritmos de Reinforcement Learning (RL), especialmente no Deep Q-Learning (DQN), 
#é quebrar a correlação entre as amostras de treinamento, 
#o que leva a um aprendizado mais estável e eficiente. 
#Ele também permite o reaproveitamento de experiências, reduzindo a necessidade de interagir continuamente com o ambiente.

class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def __len__(self):
        return len(self.buffer)
    
    def append(self, experience):
        self.buffer.append(experience)
    
    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

In [7]:
class RLDataset(IterableDataset):
    def __init__(self, buffer, sample_size = 200): 
        self.buffer = buffer
        self.sample_size = sample_size
    

    def __iter__(self):
        for experience in self.buffer.sample(self.sample_size):
            yield experience

In [13]:
##Create Enviroment

def create_enviroment(name):
    env = gym.make(name,  render_mode="rgb_array")
    env = RecordVideo(env, video_folder='./videos')
    return env


env = create_enviroment("LunarLander-v2")

# Reset the environment to start
state = env.reset()
# Run for 1000 timesteps
for _ in range(1000):
      # Render the environment
    action = env.action_space.sample()  # Take a random action
    # print("Action taken:", action)


    # Do this action in the environment and get
    # next_state, reward, done and info
    _, observation, reward, done, info = env.step(action)
    # print('Observation Space: ', observation)
    # print('Reward: ', reward)


    # If the episode is done (CartPole has fallen), reset the environment
    if done:
        state = env.reset()

env.close()  # Close the rendering window




MoviePy - Building video /home/igor/projects/reinforcement_learning/videos/rl-video-episode-0.mp4.
MoviePy - Writing video /home/igor/projects/reinforcement_learning/videos/rl-video-episode-0.mp4



                                                                       

MoviePy - Done !
MoviePy - video ready /home/igor/projects/reinforcement_learning/videos/rl-video-episode-0.mp4




MoviePy - Building video /home/igor/projects/reinforcement_learning/videos/rl-video-episode-1.mp4.
MoviePy - Writing video /home/igor/projects/reinforcement_learning/videos/rl-video-episode-1.mp4



                                                            

MoviePy - Done !
MoviePy - video ready /home/igor/projects/reinforcement_learning/videos/rl-video-episode-1.mp4


