# In this notebook we check what the agent has learnt

In [None]:
import gym
import torch
from collections import deque
import numpy as np
import matplotlib.pyplot as plt

from config import *
from agent import *
from utils import *

## 1) Skiing

### 1) rewards

In [None]:
rewards = np.load('./skiing_result/LunarLander-v2_r5_rewards.npy')
plt.plot(rewards)
plt.show()

In [None]:
average = [np.mean(rewards[i-100:i]) for i in range(100, len(rewards))]
plt.plot(average)
plt.show()

### 2) watch the show

In [None]:
env = gym.make("Skiing-ramDeterministic-v4")
agent = Agent(env.observation_space.shape[0], env.action_space.n, BATCH_SIZE, LEARNING_RATE, TAU, GAMMA, DEVICE, False)
agent.Q_local.load_state_dict(torch.load('./skiing_results/Skiing-ramDeterministic-v4_rewards.npy'))

rewards_log = []
eps = EPS_MIN
num_episode = 3
max_t = 1000

for i in range(1, 1 + num_episode):

    episodic_reward = 0
    done = False
    state = env.reset()
    t = 0

    while not done and t < max_t:

        env.render()
        t += 1
        action = agent.act(state, eps)
        next_state, reward, done, _ = env.step(action)
        state = next_state.copy()
        episodic_reward += reward

    rewards_log.append(episodic_reward)
    
rewards_log

In [None]:
env.close()

## 2) LunarLander

### 1) rewards

In [None]:
rewards = np.load('./lunarlander_results/LunarLander-v2_r5_rewards.npy')
plt.plot(rewards)
plt.show()

In [None]:
average = [np.mean(rewards[i-100:i]) for i in range(100, len(rewards))]
plt.plot(average)
plt.show()

### 2) watch the show

In [None]:
env = gym.make("LunarLander-v2")
agent = Agent(env.observation_space.shape[0], env.action_space.n, BATCH_SIZE, LEARNING_RATE, TAU, GAMMA, DEVICE, False)
agent.Q_local.load_state_dict(torch.load('./lunarlander_results/LunarLander-v2_r5_weights.pth'))

rewards_log = []
eps = EPS_MIN
num_episode = 3
max_t = 1000

for i in range(1, 1 + num_episode):

    episodic_reward = 0
    done = False
    state = env.reset()
    t = 0

    while not done and t < max_t:

        env.render()
        t += 1
        action = agent.act(state, eps)
        next_state, reward, done, _ = env.step(action)
        state = next_state.copy()
        episodic_reward += reward

    rewards_log.append(episodic_reward)
    
rewards_log

In [None]:
env.close()