<a href="https://colab.research.google.com/github/DHANA5982/Lunar_Landing_DQN/blob/main/Gynasium_Lunar_Landing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Luanr Landing

### Installing Gymnasium

In [1]:
!pip install gymnasium
!pip install 'gymnasium[atari, accept-rom-license]'
!apt-get install -y swig
!pip install gymnasium[box2d]

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  swig4.0
Suggested packages:
  swig-doc swig-examples swig4.0-examples swig4.0-doc
The following NEW packages will be installed:
  swig swig4.0
0 upgraded, 2 newly installed, 0 to remove and 34 not upgraded.
Need to get 1,116 kB of archives.
After this operation, 5,542 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig4.0 amd64 4.0.2-1ubuntu1 [1,110 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig all 4.0.2-1ubuntu1 [5,632 B]
Fetched 1,116 kB in 1s (860 kB/s)
Selecting previously unselected package swig4.0.
(Reading database ... 126102 files and directories currently installed.)
Preparing to unpack .../swig4.0_4.0.2-1ubuntu1_amd64.deb ...
Unpacking swig4.0 (4.0.2-1ubuntu1) ...
Selecting previously unselected package swig.
Preparing to unpack .../swig_4.0.2-1ubunt

In [2]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as f
import torch.autograd as autograd
from torch.autograd import Variable
from collections import deque, namedtuple

## Constructing Neural Network

In [3]:
class NeuralNetwork(nn.Module):

  def __init__(self, state_size, action_size, seed = 42):
    super(NeuralNetwork, self).__init__()
    self.seed = torch.manual_seed(seed)
    self.fc1 = nn.Linear(state_size, 64)
    self.fc2 = nn.Linear(64, 64)
    self.fc3 = nn.Linear(64, action_size)

  def forward(self, state):
    x = self.fc1(state)
    x = f.relu(x)
    x = self.fc2(x)
    x = f.relu(x)
    return self.fc3(x)

## Setting up the Environment

In [9]:
import gymnasium as gym

env = gym.make('LunarLander-v3')
state_shape = env.observation_space.shape
state_size = state_shape[0]
action_size = env.action_space.n
print('state shape: ',state_shape)
print('Number of actions: ', action_size)

state shape:  (8,)
Number of actions:  4


## Initialising Hyperparameter

In [11]:
learning_rate_alpha = 5e-4
mini_batch_size = 100
discount_factor_gamma = 0.99
replay_buffer_size = int(1e5)
interpolation_parameter_tau = 1e-3

## Implementing Experience Replay

In [17]:
class RepalyMemory(object):
  def __init__(self, capacity):
    self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    self.capacity = capacity
    self.memory = []

  def push(self, event):
    self.memory.append(event)
    if len(self.memory) > self.memory:
      del self.memory[0]

  def sample(self, batch_size):
    experiences = random.sample(self.memory, k = batch_size)
    states = torch.from_numpy(np.vstack([e[0] for e in experiences if e is not None])).float().to(self.device)
    action = torch.from_numpy(np.vstack([e[1] for e in experiences if e is not None])).long().to(self.device)
    rewards = torch.from_numpy(np.vstack([e[2] for e in experiences if e is not None])).float().to(self.device)
    next_states = torch.from_numpy(np.vstack([e[3] for e in experiences if e is not None])).float().to(self.device)
    dones = torch.from_numpy(np.vstack([e[4] for e in experiences if e is not None]).astype(np.uint8)).float().to(self.device)
    return states, action, rewards, next_states, dones

## Implementing DQN Class

In [15]:
class Agent():
  def __init__(self, state_size, action_size):
    self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    self.state_size = state_size
    self.action_size = action_size
    self.local_QNetwork = NeuralNetwork(state_size, action_size).to(self.device)
    self.target_QNetwork = NeuralNetwork(state_size, action_size).to(self.device)
    self.optimizer = optim.Adam(self.local_QNetwork.parameters(), lr = learning_rate_alpha)
    self.memory = RepalyMemory(replay_buffer_size)
    self.t_step = 0

  def step(self, state, action, reward, next_state, done):
    self.memory.push((state, action, reward, next_state, done))
    self.t_step = (self.t_step + 1) % 4
    if self.t_step == 0:
      if len(self.memory.memory) > mini_batch_size:
        experiences = self.memory.sample(100)
        self.learn(experiences, discount_factor_gamma)

  def act(self, state, epsilon = 0.):
    state = torch.from_numpy(state).float().unsqueeze(0).to(self.device)
    self.local_QNetwork.eval()
    with torch.no_grad():
      action_values = self.local_QNetwork(state)
    self.local_QNetwork.train()
    if random.random() > epsilon:
      return np.argmax(action_values.cup().data.numpy())
    else:
      return random.choice(np.arange(self.action_size))

  def learn(self, experiences, discount_factor_gamma):
    states, next_states, actions, rewards, dones = experiences
    next_QTarget = self.target_QNetwork(next_states).detach().max(1)[0].unsqueeze(1)
    QTarget = rewards + (discount_factor_gamma * next_QTarget * (1-dones))
    QExpected = self.local_QNetwork(states).gather(1, actions)
    loss = f.mse_loss(QExpected, QTarget)
    self.optimizer.zero_grad()
    loss.backward()
    self.optimizer.step()
    self.soft_update(self.local_QNetwork, self.target_QNetwork, interpolation_parameter_tau)

  def soft_update(self, local_model, target_model, interpolation_parameter):
    for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
      target_param.data.copy_(interpolation_parameter * local_param.data + (1.0 - interpolation_parameter) * target_param.data)

## Initialise the DQN Agent

In [18]:
agent = Agent(state_size, action_size)

## Training the DQN Agent

In [19]:
number_of_episodes = 2000
maximum_number_timesteps_per_episode = 1000
epsilon_starting_value = 1.0
epsilon_ending_value = 0.01
epsilon_decay_value = 0.995
epsilon = epsilon_starting_value
scores_on_100_episodes = deque(maxlen=100)