In [None]:
from dataloader import AtariDataset
import gym
import torch
import numpy as np
import random
import os

from dqn import DQN
import dqn

def reseed(seed):
  torch.manual_seed(seed)
  random.seed(seed)
  np.random.seed(seed)
seed = 42
reseed(seed)

def make_env(env_id, seed=25):
    env = gym.make(env_id, obs_type='grayscale', render_mode=None, repeat_action_probability=0.0,frameskip=1)
    env.seed(seed)
    env.action_space.seed(seed)
    env.observation_space.seed(seed)
    return env
env = make_env("SpaceInvaders-v0", seed=seed)
print(env.action_space.n)
print(env.observation_space.shape)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

## LOAD ATARI DATA

In [None]:
dataloader = AtariDataset("atari_v1", 15)
atari_obs, atari_act, atari_rew, atari_next, atari_done = dataloader.compile_data()

## DATA COLLECTION

In [None]:
def data_collect(learner, env, num_episodes, save_path, device):
  observations = []
  actions = []
  rewards = []
  next_observations = []
  dones = []
  for _ in range(num_episodes):
      obs = env.reset()
      done = False
      while not done:
          if isinstance(learner, DQN):
            with torch.no_grad():
              action = learner.get_action(
              torch.tensor(obs).to(device).unsqueeze(0), eps=0.0
              )
          else:
             with torch.no_grad():
              action = learner.get_action(
              torch.tensor(obs).to(device).unsqueeze(0)
              )
          next_obs, reward, done, _ = env.step(action)
          observations.append(obs.flatten())
          actions.append(action)
          rewards.append(reward)
          next_observations.append(next_obs.flatten())
          dones.append(done)
          obs = next_obs
  return observations, actions, rewards, next_observations, dones

## LOAD BC

In [None]:
from bc import SpaceInvLearner

bc_learner = SpaceInvLearner(env)

bc_learner.load_state_dict(torch.load('models/bc_learner.pth'))

In [None]:
bc_obs, bc_act, bc_rew, bc_next, bc_done = data_collect(bc_learner, env, 25, 'numpy_data/bc', device)

## LOAD DAGGER

In [None]:
dagger_learner = SpaceInvLearner(env)

dagger_learner.load_state_dict(torch.load('models/DAgger.pth'))

In [None]:
dagger_obs, dagger_act, dagger_rew, dagger_next, dagger_done = data_collect(dagger_learner, env, 25, 'numpy_data/dagger', device)

In [None]:
import matplotlib.pyplot as plt

def plot(values, label):
    plt.plot(np.arange(len(values)), values)
    plt.ylabel(label)
    plt.xlabel('Epoch')
    plt.show()

## TRAIN DQN w/ ATARI

In [None]:
from dqn import DQN
import dqn

INPUT_SHAPE = 210*160
ACTION_SIZE = env.action_space.n

atari_dqn_learner = DQN(INPUT_SHAPE, ACTION_SIZE)

dqn.train(atari_dqn_learner, env, observations=atari_obs, actions=atari_act, rewards=atari_rew, next_observations=atari_next, dones=atari_done, save_path='models/atari_dqn.pth', num_episodes=25, lr=1e-6)

In [None]:
plot(atari_dqn_learner.test_loss, 'Loss')

In [None]:
plot(atari_dqn_learner.test_scores, 'Score')

## TRAIN DQN w/ BC

In [None]:
bc_dqn_learner = DQN(INPUT_SHAPE, ACTION_SIZE)

dqn.train(bc_dqn_learner, env, observations=bc_obs, actions=bc_act, rewards=bc_rew, next_observations=bc_next, dones=bc_done, save_path='models/bc_dqn.pth', num_episodes=25, lr=1e-6)

In [None]:
plot(bc_dqn_learner.test_loss, 'Loss')

In [None]:
plot(bc_dqn_learner.test_scores, 'Score')

## TRAIN DQN w/ DAgger

In [None]:
dagger_dqn_learner = DQN(INPUT_SHAPE, ACTION_SIZE)

dqn.train(dagger_dqn_learner, env, observations=dagger_obs, actions=dagger_act, rewards=dagger_rew, next_observations=dagger_next, dones=dagger_done, save_path='models/dagger_dqn.pth', num_episodes=25, lr=1e-6)

In [None]:
plot(dagger_dqn_learner.test_loss, 'Loss')

In [None]:
plot(dagger_dqn_learner.test_scores, 'Score')

## TEST DQN

In [None]:
avg_scores = []

In [None]:
dqn_learner = DQN(INPUT_SHAPE, ACTION_SIZE)

dqn_learner.load_state_dict(torch.load('models/atari_dqn.pth'), strict=True)

total_learner_reward = []
done = False
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for i in range(50):
    obs = env.reset()
    done = False
    sum_reward = 0
    while not done:
        with torch.no_grad():
            action = dqn_learner.get_action(torch.Tensor([obs]).to(device), eps=0.0)
        obs, reward, done, info = env.step(action)
        sum_reward += reward
        if done:
            break
    total_learner_reward += [sum_reward]

atari_mean = np.mean(total_learner_reward)
avg_scores.append(('AGC DQN', atari_mean))

In [None]:
dqn_learner.load_state_dict(torch.load('models/bc_dqn.pth'), strict=True)

total_learner_reward = []
done = False
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for i in range(50):
    obs = env.reset()
    done = False
    sum_reward = 0
    while not done:
        with torch.no_grad():
            action = dqn_learner.get_action(torch.Tensor([obs]).to(device), eps=0.0)
        obs, reward, done, info = env.step(action)
        sum_reward += reward
        if done:
            break
    total_learner_reward += [sum_reward]

bc_dqn_mean = np.mean(total_learner_reward)
avg_scores.append(('BC DQN', bc_dqn_mean))

In [None]:
dqn_learner.load_state_dict(torch.load('models/dagger_dqn.pth'), strict=True)

total_learner_reward = []
done = False
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for i in range(50):
    obs = env.reset()
    done = False
    sum_reward = 0
    while not done:
        with torch.no_grad():
            action = dqn_learner.get_action(torch.Tensor([obs]).to(device), eps=0.0)
        obs, reward, done, info = env.step(action)
        sum_reward += reward
        if done:
            break
    total_learner_reward += [sum_reward]

dagger_dqn_mean = np.mean(total_learner_reward)
avg_scores.append(('DAgger DQN', dagger_dqn_mean))

In [None]:
total_learner_reward = []
done = False
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for i in range(50):
    obs = env.reset()
    done = False
    sum_reward = 0
    while not done:
        with torch.no_grad():
            action = bc_learner.get_action(torch.Tensor([obs]).to(device))
        obs, reward, done, info = env.step(action)
        sum_reward += reward
        if done:
            break
    total_learner_reward += [sum_reward]

bc_mean = np.mean(total_learner_reward)
avg_scores.append(('BC', bc_mean))

In [None]:
total_learner_reward = []
done = False
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

for i in range(50):
    obs = env.reset()
    done = False
    sum_reward = 0
    while not done:
        with torch.no_grad():
            action = dagger_learner.get_action(torch.Tensor([obs]).to(device))
        obs, reward, done, info = env.step(action)
        sum_reward += reward
        if done:
            break
    total_learner_reward += [sum_reward]

bc_mean = np.mean(total_learner_reward)
avg_scores.append(('DAgger', bc_mean))

In [None]:
names = [item[0] for item in avg_scores]
scores = [item[1] for item in avg_scores]

# Plotting the bar graph
plt.bar(names, scores, color='skyblue')

# Adding labels and title
plt.xlabel('Models')
plt.ylabel('Scores')
plt.title('Average scores over 50 runs')

# Display the plot
plt.show()

