In [1]:
from collections import deque
import matplotlib.pyplot as plt
import numpy as np
import random
import torch
from unityagents import UnityEnvironment
%matplotlib inline

ModuleNotFoundError: No module named 'torch'

In [None]:
from dqnagent import Agent

In [None]:
env = UnityEnvironment(file_name="/data/Banana_Linux_NoVis/Banana.x86_64")

In [None]:
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

In [None]:
env_info = env.reset(train_mode=True)[brain_name]
print('Number of agents:', len(env_info.agents))
action_size = brain.vector_action_space_size
print('Number of actions:', action_size) 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

In [None]:
env_info = env.reset(train_mode=False)[brain_name] 
state = env_info.vector_observations[0]           
score = 0                                          
while True:
    action = np.random.randint(action_size)       
    env_info = env.step(action)[brain_name]        
    next_state = env_info.vector_observations[0]   
    reward = env_info.rewards[0]                   
    done = env_info.local_done[0]                  
    score += reward                               
    state = next_state                             
    if done:                                       
        break   
print("Score: {}".format(score))

In [None]:
def dqn(n_episodes=2000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.995, train_mode=True, 
        ckpt_path='pth_checkpoints/checkpoint.pth'):
    scores = []                        
    scores_window = deque(maxlen=100) 
    moving_avgs = []                   
    eps = eps_start                    
    for i_episode in range(1, n_episodes+1):
        env_info = env.reset(train_mode=train_mode)[brain_name] 
        state = env_info.vector_observations[0]                
        score = 0
        for t in range(max_t):
            action = agent.act(state, eps)                     
            env_info = env.step(action)[brain_name]            
            next_state = env_info.vector_observations[0]        
            reward = env_info.rewards[0]                       
            done = env_info.local_done[0]                      
            agent.step(state, action, reward, next_state, done) 
            score += reward
            if done:
                break 
        scores_window.append(score)         
        scores.append(score)                
        moving_avg = np.mean(scores_window)  
        moving_avgs.append(moving_avg)      
        eps = max(eps_end, eps_decay*eps)    
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, moving_avg), end="")
        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, moving_avg))
        if moving_avg >= 13.0:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, moving_avg))
            if train_mode:
                torch.save(agent.qnetwork_local.state_dict(), ckpt_path)
            break
    return scores, moving_avgs

In [None]:
agent = Agent(state_size=state_size, action_size=action_size, seed=0, use_double=False, use_dueling=False)
scores, avgs = dqn(n_episodes=600, eps_decay=0.98, eps_end=0.02, ckpt_path='pth_checkpoints/v28_checkpoint.pth')
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores, label='DQN+RB+FC64')
plt.plot(np.arange(len(scores)), avgs, c='r', label='average')
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.legend(loc='upper left');
plt.show()

In [None]:
agent = Agent(state_size=state_size, action_size=action_size, seed=0)
checkpoint = 'pth_checkpoints/v28_checkpoint.pth'
agent.qnetwork_local.load_state_dict(torch.load(checkpoint))
num_episodes = 10
scores = []
for i_episode in range(1,num_episodes+1):
    env_info = env.reset(train_mode=False)[brain_name] 
    state = env_info.vector_observations[0]            
    score = 0                                         
    while True:
        action = agent.act(state, eps=0)              
        env_info = env.step(action)[brain_name]       
        next_state = env_info.vector_observations[0]   
        reward = env_info.rewards[0]                  
        done = env_info.local_done[0]                        
        score += reward                                
        state = next_state                            
        if done:                                       
            scores.append(score)
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores)))
            break
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

In [None]:
env.close()