In [1]:
from dataloader import AtariDataset
import gym
import torch.nn as nn
import torch
import numpy as np
import random
import tqdm
from tqdm import tqdm
import torch.nn.functional as F
from torch.optim import optimizer
import matplotlib.pyplot as plt
from IPython import display as ipythondisplay

## SEEDING

In [2]:
def reseed(seed):
  torch.manual_seed(seed)
  random.seed(seed)
  np.random.seed(seed)
seed = 42
reseed(seed)

## LOAD DATA

In [3]:
dataloader = AtariDataset("atari_v1")
observations, actions = dataloader.compile_data()

1


## MAKE ENVIRONMENT

In [4]:
def make_env(env_id, seed=25):
    env = gym.make(env_id, obs_type='grayscale', render_mode=None)
    env.seed(seed)
    env.action_space.seed(seed)
    env.observation_space.seed(seed)
    return env
env = make_env("SpaceInvaders-v0", seed=seed)
print(env.action_space.n)
print(env.observation_space.shape)




6
(210, 160)


A.L.E: Arcade Learning Environment (version 0.7.5+db37282)
[Powered by Stella]


## Train BC

In [5]:
from model import SpaceInvLearner
import bc

learner = SpaceInvLearner(env)

bc.train(learner=learner, observations=observations, checkpoint_path="models/bc_learner.pth", actions=actions, num_epochs=10)

Training the learner
Training for 10 epochs


 10%|█         | 1/10 [00:00<00:04,  1.92it/s]

Epoch 0, Loss: 0.320985439909767


 20%|██        | 2/10 [00:00<00:03,  2.19it/s]

Epoch 1, Loss: 0.15203793015841832


 30%|███       | 3/10 [00:01<00:03,  2.28it/s]

Epoch 2, Loss: 0.11707441591602658


 40%|████      | 4/10 [00:01<00:02,  2.34it/s]

Epoch 3, Loss: 0.0897348363447287


 50%|█████     | 5/10 [00:02<00:02,  2.36it/s]

Epoch 4, Loss: 0.07815708242093747


 60%|██████    | 6/10 [00:02<00:01,  2.39it/s]

Epoch 5, Loss: 0.06733066741372029


 70%|███████   | 7/10 [00:02<00:01,  2.40it/s]

Epoch 6, Loss: 0.06035556291653814


 80%|████████  | 8/10 [00:03<00:00,  2.40it/s]

Epoch 7, Loss: 0.05644254338033301


 90%|█████████ | 9/10 [00:03<00:00,  2.41it/s]

Epoch 8, Loss: 0.05239081545728374


100%|██████████| 10/10 [00:04<00:00,  2.36it/s]

Epoch 9, Loss: 0.05056144293562604





SpaceInvLearner(
  (fc1): Linear(in_features=33600, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=256, bias=True)
  (fc_out): Linear(in_features=256, out_features=6, bias=True)
)

In [6]:
total_learner_reward = 0
done = False
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
obs = env.reset()
while not done:
    with torch.no_grad():
        action = learner.get_action(torch.Tensor([obs]).to(device))
    obs, reward, done, info = env.step(action)
    total_learner_reward += reward
    if done:
        break

print(total_learner_reward)

  action = learner.get_action(torch.Tensor([obs]).to(device))


410.0


## LOAD EXPERT

In [9]:
from expert.agents.dqn_agent import DQNAgent
from expert.models.dqn_cnn import DQNCnn

INPUT_SHAPE = (4, 84, 84)
ACTION_SIZE = env.action_space.n
SEED = seed
GAMMA = 0.99           # discount factor
BUFFER_SIZE = 100000   # replay buffer size
BATCH_SIZE = 64        # Update batch size
LR = 0.0001            # learning rate 
TAU = 1e-3             # for soft update of target parameters
UPDATE_EVERY = 1       # how often to update the network
UPDATE_TARGET = 10000  # After which thershold replay to be started 
EPS_START = 0.99       # starting value of epsilon
EPS_END = 0.01         # Ending value of epsilon
EPS_DECAY = 100         # Rate by which epsilon to be decayed

agent = DQNAgent(INPUT_SHAPE, ACTION_SIZE, SEED, device, BUFFER_SIZE, BATCH_SIZE, GAMMA, LR, TAU, UPDATE_EVERY, UPDATE_TARGET, DQNCnn)

agent.load_model("models/expert_DQN.pth")

ModuleNotFoundError: No module named 'models.dqn_cnn'

## DAgger Implementation

In [None]:
import dagger

dagger.interact(env, learner, agent, observations=[], actions=[], checkpoint_path="models/DAgger.pth", seed=seed, num_epochs=100)