# Reinforcement Learning with Atari Games

In [1]:
import os
from dotenv import load_dotenv

from core.env_details import EnvDetails
from core.parameters import ModelParameters, DQNParameters
from agents.dqn import DQN
from models.cnn import CNNModel
import torch.optim as optim
import torch.nn as nn

## 1. Initial Setup

In [2]:
 # Create access to .env file (hyperparameters)
load_dotenv()

SEED = int(os.getenv('SEED'))
LEARNING_RATE = float(os.getenv('LEARNING_RATE'))
EPSILON = float(os.getenv('EPSILON'))
NUM_EPISODES = int(os.getenv('NUM_EPISODES'))

In [3]:
# Set environment class
env_details = EnvDetails(
    gym_name=os.getenv('ENV_1'),
    img_size=int(os.getenv('IMG_SIZE')),
    stack_size=int(os.getenv('STACK_SIZE'))
)

In [4]:
env_details

{'gym_name': 'ALE/SpaceInvaders-v5', 'name': 'SpaceInvaders', 'env': <FrameStack<ResizeObservation<GrayScaleObservation<TimeLimit<OrderEnforcing<AtariEnv<ALE/SpaceInvaders-v5>>>>>>>, 'obs_space': Box(0, 255, (4, 128, 128), uint8), 'action_space': Discrete(6), 'input_shape': (4, 128, 128), 'n_actions': 6, 'img_size': 128, 'stack_size': 4}

## 2. Model Creation

### 2a. Deep Q-Network (DQN)

In [5]:
# Set DQN hyperparameters
network = CNNModel(input_shape=env_details.input_shape, n_actions=env_details.n_actions, seed=SEED)

model_params = ModelParameters(
    network=network,
    optimizer=optim.Adam(network.parameters(), lr=LEARNING_RATE, eps=EPSILON),
    loss_metric=nn.MSELoss()
)

dqn_params = DQNParameters(
    gamma=float(os.getenv('GAMMA')),
    tau=float(os.getenv('TAU')),
    buffer_size=int(float(os.getenv('BUFFER_SIZE'))),
    batch_size=int(os.getenv('BATCH_SIZE')),
    update_steps=int(os.getenv('UPDATE_STEPS')),
    target_network=network
)

In [6]:
# Create DQN instance
dqn = DQN(env_details, model_params, dqn_params, SEED)

CUDA available. Device set to GPU.


In [7]:
# Train model
dqn.train(num_episodes=3, print_every=1)

Training agent on 3 episodes with 1k timesteps.
Buffer size: 100k, batch size: 32.
(1/3) Episode Score: 180, Train Loss: 0.00135
(2/3) Episode Score: 105, Train Loss: 0.00125
(3/3) Episode Score: 35, Train Loss: 0.00070
Training complete. Access metrics from 'logger' attribute.
