In [None]:
from dataloader import AtariDataset
import gym
import torch
import numpy as np
import random
import os

def reseed(seed):
  torch.manual_seed(seed)
  random.seed(seed)
  np.random.seed(seed)
seed = 42
reseed(seed)

def make_env(env_id, seed=25):
    env = gym.make(env_id, obs_type='grayscale', render_mode=None, repeat_action_probability=0.15,frameskip=1)
    env.seed(seed)
    env.action_space.seed(seed)
    env.observation_space.seed(seed)
    return env
env = make_env("SpaceInvaders-v0", seed=seed)
print(env.action_space.n)
print(env.observation_space.shape)

In [None]:
def visualize(learner, env, video_name="test"):
    """Visualize a policy network for a given algorithm on a single episode

        Args:
            algorithm (PolicyGradient): Algorithm whose policy network will be rolled out for the episode. If
            no algorithm is passed in, a random policy will be visualized.
            video_name (str): Name for the mp4 file of the episode that will be saved (omit .mp4). Only used
            when running on local machine.
    """

    import cv2

    print("Visualizing")

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    video = cv2.VideoWriter(f"{video_name}.avi", fourcc, 24, (160,210), isColor = True)
    obs = env.reset()
    done = False
    total_reward = 0
    while not done:
        action = learner.get_action(torch.Tensor([obs]).to(device))
        obs, reward, done, info = env.step(action)

        total_reward += reward

        if done:
            break

        im = env.render(mode='rgb_array')
        
        video.write(im)

    video.release()
    env.close()
    print(f"Video saved as {video_name}.avi")
    print("Reward: " + str(total_reward))

## LOAD ATARI DATA

In [None]:
dataloader = AtariDataset("atari_v1")
observations, actions, rewards, next_observations, dones = dataloader.compile_data()

np.save('numpy_data/atari/observations', observations)
np.save('numpy_data/atari/actions', actions)
np.save('numpy_data/atari/rewards', rewards)
np.save('numpy_data/atari/dones', dones)
np.save('numpy_data/atari/next_observations', next_observations)

In [None]:
atari_obs = np.load('numpy_data/atari/observations')
atari_act = np.load('numpy_data/atari/actions')
atari_rew = np.load('numpy_data/atari/rewards')
atari_next = np.load('numpy_data/atari/dones')
atari_done = np.load('numpy_data/atari/next_observations')

## DATA COLLECTION

In [None]:
def data_collect(learner, env, num_episodes, save_path):
  observations = []
  actions = []
  rewards = []
  next_observations = []
  dones = []
  for _ in range(num_episodes):
      obs = env.reset()
      done = False
      while not done:
          action = learner.get_action(
              torch.tensor(obs).unsqueeze(0), eps=0.0
          )  # Greedy action
          next_obs, reward, done, _ = env.step(action)
          observations.append(obs)
          actions.append(action)
          rewards.append(reward)
          next_observations.append(next_obs)
          dones.append(done)
          obs = next_obs
  np.save(os.path.join(save_path, 'observations'), observations)
  np.save(os.path.join(save_path, 'actions'), actions)
  np.save(os.path.join(save_path, 'rewards'), rewards)
  np.save(os.path.join(save_path, 'dones'), dones)
  np.save(os.path.join(save_path, 'next_observations'), next_observations)

## LOAD BC

In [None]:
from bc import SpaceInvLearner

bc_learner = SpaceInvLearner(env)

bc_learner.load_state_dict(torch.load('models/bc_learner.pth'))

In [None]:
data_collect(bc_learner, env, 25, 'numpy_data/bc')

## LOAD DAGGER

In [None]:
dagger_learner = SpaceInvLearner(env)

dagger_learner.load_state_dict(torch.load('models/DAgger.pth'))

In [None]:
data_collect(bc_learner, env, 25, 'numpy_data/dagger')

## TRAIN DQN w/ ATARI

In [None]:
from dqn import DQN
import dqn

INPUT_SHAPE = 210*160
ACTION_SIZE = env.action_space.n

atari_dqn_learner = DQN(INPUT_SHAPE, ACTION_SIZE)

dqn.train(atari_dqn_learner, env, observations=atari_obs, actions=atari_act, rewards=atari_rew, next_observations=atari_next, dones=atari_done, save_path='models/atari_dqn.pth')

## TRAIN DQN w/ BC

In [None]:
bc_obs = np.load('numpy_data/bc/observations')
bc_act = np.load('numpy_data/bc/actions')
bc_rew = np.load('numpy_data/bc/rewards')
bc_done = np.load('numpy_data/bc/dones')
bc_next = np.load('numpy_data/bc/next_observations')

bc_dqn_learner = DQN(INPUT_SHAPE, ACTION_SIZE)

dqn.train(bc_dqn_learner, env, observations=bc_obs, actions=bc_act, rewards=bc_rew, next_observations=bc_next, dones=bc_done, save_path='models/bc_dqn.pth')

## TRAIN DQN w/ DAgger

In [None]:
dagger_obs = np.load('numpy_data/dagger/observations')
dagger_act = np.load('numpy_data/dagger/actions')
dagger_rew = np.load('numpy_data/dagger/rewards')
dagger_done = np.load('numpy_data/dagger/dones')
dagger_next = np.load('numpy_data/dagger/next_observations')

dagger_dqn_learner = DQN(INPUT_SHAPE, ACTION_SIZE)

dqn.train(dagger_dqn_learner, env, observations=dagger_obs, actions=dagger_act, rewards=dagger_rew, next_observations=dagger_done, dones=dagger_next, save_path='models/dagger_dqn.pth')

## TEST DQN

In [None]:
dqn_learner = DQN(INPUT_SHAPE, ACTION_SIZE)

dqn_learner.load_state_dict(torch.load('models/atari_dqn.pth'), strict=True)

total_learner_reward = []
done = False
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for i in range(20):
    obs = env.reset()
    done = False
    sum_reward = 0
    while not done:
        with torch.no_grad():
            action = dqn_learner.get_action(torch.Tensor([obs]).to(device))
        obs, reward, done, info = env.step(action)
        sum_reward += reward
        if done:
            break
    total_learner_reward += [sum_reward]

print(np.mean(total_learner_reward)/20)