# Apprenticeship Learning via IRL - Deep Q Learning

## Cartpole-v0

[CartPole-v0 Wiki](https://github.com/openai/gym/wiki/CartPole-v0)

In [4]:
import gym
import random
import torch.nn as nn
import torch.nn.functional as F
from collections import namedtuple

In [2]:
# Class to store the experiences (transitions) of the agent as it interacts with the environment

class ReplayMemory(object):
    
    def __init__(self, capacity):
        self.capacity = capacity   # maximum number of transitions the buffer can hold
        self.memory = []           # list to store the transitions
        self.position = 0          # counter to keep track of the next available slot in the buffer

    def push(self, *args):
        """
        Save a transition in the replay memory.

        Parameters:
        - *args: Tuple representing a transition.
        """
        
        if len(self.memory) < self.capacity:                  # if buffer not full, appends the transition
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)        # overwrites the transition at the current position
        self.position = (self.position + 1) % self.capacity   # update counter to the next available slot

    def sample(self, batch_size):
        """
        Sample a batch of transitions from the replay memory.
        """
        return random.sample(self.memory, batch_size)

    def __len__(self):
        """
        Return the current size of the replay memory.
        """
        return len(self.memory)

In [6]:
# transition in the replay memory

Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward'))

In [5]:
# Neural Network for Deep Q Learning 

HIDDEN_LAYER = 64

class DQN(nn.Module):
    def __init__(self):
        """
        Initialize the DQN model.

        The model consists of three hidden layers with ReLU activation functions.

        Input:
        - 4 input features (state space)
        - HIDDEN_LAYER number of neurons in each hidden layer
        
        Output:
          - 2 output neurons representing Q-values for each action
        """
        
        super().__init__()
        
        self.il = nn.Linear(4, HIDDEN_LAYER)
        self.h1 = nn.Linear(HIDDEN_LAYER, HIDDEN_LAYER)
        self.h2 = nn.Linear(HIDDEN_LAYER, HIDDEN_LAYER)
        # self.h3 = nn.Linear(HIDDEN_LAYER, HIDDEN_LAYER)
        self.ol = nn.Linear(HIDDEN_LAYER, 2)

    def forward(self, x):
        """
        Define the forward pass of the model.

        Parameters:
        - x: Input state.

        Returns:
        - Output Q-values for each action.
        """
        x = F.relu(self.il(x))
        x = F.relu(self.h1(x))
        x = F.relu(self.h2(x))
        # x = F.relu(self.h3(x))
        x = self.ol(x)
        return x