# DO NOT SUBMIT THIS NOTEBOOK

In [4]:
import torch
import gym
import torch.nn as nn
import numpy as np
import random
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def set_seed(env, seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    env.reset(seed = seed)

In [5]:
class DQN(nn.Module):
    def __init__(self):
        super(DQN, self).__init__()
        self.layers = None
        
    def forward(self, x):
        return self.layers(x)
    
class DQNAgent(object):
    
    def __init__(self):

        self.action_value_net = DQN().to(device)
                     
    def obtain_action(self, state, action_space_dim, epsilon):
        with torch.no_grad():
            cur_q = self.action_value_net(torch.from_numpy(state).float().to(device))
        q_value, action = torch.max(cur_q, axis=0)
        action = action if torch.rand(1,).item() > epsilon else torch.randint(0, action_space_dim, (1,)).item()
        action = torch.tensor([action]).to(device)
        return action
    
    def get_next_q(self, state):
        return None
    
    def optimize(self, batch):
        return None
    
    def transfer_parameters(self):
        return None

In [6]:
# Test your network here
file_name = '32276117_Practical08_Exercise.pt'
my_agent = torch.load(file_name, map_location=device)
print(f"For sanity check, this is your network architecture:\n\n{my_agent.action_value_net.layers}")
my_env = gym.make("LunarLander-v2")
n_trials = 100

print("\n\nRunning 100 trials ...")

def run_trial(my_env, num_trials, my_agent):
    list_rewards = []
    set_seed(my_env, my_agent.seed)
    for i in range(num_trials):
        observation = my_env.reset()[0]
        ep_return = 0
        is_finished = False
        while not is_finished:
            action = my_agent.obtain_action(observation, 4, 0.01)
            observation, reward, done, truncated, _ = my_env.step(action.item())
            is_finished = done or truncated
            ep_return += reward
        list_rewards.append(ep_return)
        
    return np.round(np.mean(list_rewards),2), np.round(np.std(list_rewards), 2)

mean_return, std_return = run_trial(my_env, n_trials, my_agent)
print(f"Done.\n\nAverage return {mean_return} +- {std_return}")

For sanity check, this is your network architecture:

Sequential(
  (0): Linear(in_features=8, out_features=64, bias=True)
  (1): ReLU()
  (2): Linear(in_features=64, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=4, bias=True)
)


DependencyNotInstalled: box2D is not installed, run `pip install gym[box2d]`

### Grading scheme

| Avg. Return | Marks       |
| ----------- | ----------- |
| < 0         | 1  pt       |
| [0,50)      | 2  pts      |
| [50, 100)   | 3  pts      |
| [100, 150)  | 4  pts      |
| [150, 200)  | 5  pts      |
| ≥ 200       | 6  pts      |

In [None]:
import otter
grader = otter.Notebook(tests_dir = "Practical08_Support/tests")
grader.check_all()

dqn_performance results:
    dqn_performance - 1 message: Your network is outputting something

    dqn_performance - 1 result:
        Test case passed!

    dqn_performance - 2 message: Average return is < 0

    dqn_performance - 2 result:
        Trying:
            mean_return > 0
        Expecting:
            True
        **********************************************************************
        Line 2, in dqn_performance 1
        Failed example:
            mean_return > 0
        Expected:
            True
        Got:
            False

    dqn_performance - 3 message: Average return is < 50

    dqn_performance - 3 result:
        Trying:
            mean_return >= 50
        Expecting:
            True
        **********************************************************************
        Line 2, in dqn_performance 2
        Failed example:
            mean_return >= 50
        Expected:
            True
        Got:
            False

    dqn_performance - 4 message: A

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=d30c9d78-b335-4e3c-b9dd-5dae33556494' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>