# Roblox RL NPC

In [10]:
import os, sys, platform, math, random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from IPython.display import display
from collections import namedtuple, deque
from itertools import count

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import gymnasium as gym

device = torch.device(
    "cuda" if torch.cuda.is_available() else
    "mps" if torch.backends.mps.is_available() else
    "cpu"
)
print("Device:", device)

# Import models defined in models.py
from models import DQN
from gym_env import GridWorldEnv, RBXEnv

Device: cpu


---

### Replay Memory
A fixed-size buffer that stores recently observed transitions. Essentially, short-term memory but for RL. Uses sample() to randomly select a batch of transitions to be used for training.

In [None]:
Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))

class ReplayMemory(object):
    # Double-ended queue with limited capacity
    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        # Save a transition
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

### Hyperparameters

| Parameter | Description | 
|---|---|
| BATCH_SIZE | the number of transitions sampled from the replay buffer.
| GAMMA | the discount factor as mentioned in the previous section.
| EPS_START | the starting value of epsilon.
| EPS_END | the final value of epsilon.
| EPS_DECAY | controls the rate of exponential decay of epsilon, higher means a slower decay.
| TAU | the update rate of the target network.
| LR | the learning rate of the ``AdamW`` optimizer.

In [7]:
BATCH_SIZE = 128
GAMMA = 0.99
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 1000
TAU = 0.005
LR = 1e-4