## Installing the packages

In [1]:
!pip install pybullet



## Importing the libraries

In [2]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

## Step 1: Initialze the Experience Replay Memory

In each of the training episodes we will leverage a some instances of the replay memory to account for anomolous (state, action) pairs.

In [4]:
class ReplayBuffer(object):
    def __init__(self, max_size=1e6):
        self.storage = []
        self.max_size = max_size
        self.ptr = 0
    def add(self, transition):
        if len (self.storage) == self.max_size:
            self.storage[int(self.ptr)] = transition
            self.ptr = (self.ptr + 1)% self.max_size
        else:
            self.storage.append(transition)
            
    def sample(self, batch_size):
        ind = np.random.randint(0, len(self.storage), size = batch_size)
        batch_states, batch_next_states, batch_actions, \
        batch_rewards, batch_dones = [], [], [], [], []
        for i in ind:
            state, next_state, action, reward, done = self.storage[i]
            batch_states.append(np.array(state, copy=False))
            batch_next_states.append(np.array(action, copy=False))
            batch_rewards.append(np.array(done, copy=False))
        return np.array(batch_states), np.array(batch_next_states),\
        np.array(batch_actions), np.array(batch_rewards).reshape(-1,1),\
        np.array(batch_dones).reshape(-1,1)