## DDQN in Hockey Enviroment - Evaluation

Evaluate a trained DDQN against various environments

Base DQN implementation adapted from HW7

In [19]:
from importlib import reload

import numpy as np
from tqdm.notebook import tqdm

import DDQN.DDQN as ddqn
from DDQN.DDQN import DDQNAgent
from DDQN.evaluation import compare_agents, display_stats
import hockey.hockey_env as h_env

reload(h_env)
reload(ddqn)

<module 'DDQN.DDQN' from '/home/kivanc/ders/RL/project/RL-Hockey/DDQN/DDQN.py'>

In [20]:
def running_mean(x, N):
    cumsum = np.cumsum(np.insert(x, 0, 0)) 
    return (cumsum[N:] - cumsum[:-N]) / float(N)

## Environment & Agent Initialization

In [21]:
env = h_env.HockeyEnv(mode=h_env.Mode.NORMAL)
env.reset()

(array([-3.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  3.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  1.98570633, -0.27267766,  0.        ,
         0.        ,  0.        ,  0.        ]),
 {'winner': 0,
  'reward_closeness_to_puck': 0.0,
  'reward_touch_puck': 0.0,
  'reward_puck_direction': 0.0})

In [22]:
STATE_PATH = "./DDQN/models/hockey_mixed_dev/"

agent_player = DDQNAgent(
    env.observation_space,
    env.discrete_action_space
)
agent_player.load_state(STATE_PATH)

agent_opp_weak = h_env.BasicOpponent(weak=True)
agent_opp_strong = h_env.BasicOpponent(weak=False)

## Evaluation

### Winning Rates Against Opponents & Match Statistics

In [23]:
weak_opp_stats = compare_agents(
    agent_player, agent_opp_weak, env, num_matches=1000, tqdm=tqdm
)
strong_opp_stats = compare_agents(
    agent_player, agent_opp_strong, env, num_matches=1000, tqdm=tqdm
)

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

In [24]:
print("Weak Opponent Stats:")
display_stats(weak_opp_stats)

print("Strong Opponent Stats:")
display_stats(strong_opp_stats)

Weak Opponent Stats:
Player Observation Mean:
  x pos player one: -2.0193157010233556
  y pos player one: 0.4608703811093886
  angle player one: 0.08931009992165054
  x vel player one: 0.47304561207102586
  y vel player one: 0.34364539633539326
  angular vel player one: 0.03850416311502162
  x player two: 2.7995687399482616
  y player two: 0.005574076569847045
  angle player two: -0.00616579631601391
  y vel player two: 0.02789970308117944
  y vel player two: -0.001102856617999194
  angular vel player two: 0.0010214674983274443
  x pos puck: -0.022926518486426666
  y pos puck: 0.05001918626254128
  x vel puck: -0.8886079000628004
  y vel puck: -0.0145039224842853
  left player puck keep time: 0.8623762376237624
  right player puck keep time: 0.8264414676761794

Relative Std. Change in Agent Observations:
  x pos player one: 0.15442928997866898
  y pos player one: 0.5212089896134883
  angle player one: 0.7720337870335996
  x vel player one: -0.2516995590713395
  y vel player one: 0.5401

### Rendered Demonstration Against Strong Opponent

In [26]:
obs_buffer = []
reward_buffer = []
obs, _ = env.reset()
obs_opp = env.obs_agent_two()

done = False
trunc = False
step = 0
while not (done or trunc):
    step += 1
    env.render()

    a1_discr = agent_player.act(obs)
    a1 = env.discrete_to_continous_action(a1_discr)
    a2 = agent_opp_strong.act(obs_opp)

    obs, r, done, trunc, _ = env.step(np.hstack([a1, a2]))
    obs_buffer.append(obs)
    reward_buffer.append(r)

    obs_opp = env.obs_agent_two()

    if done or trunc:
        print(f"Episode done in {step} steps")
        break

Episode done in 116 steps


In [27]:
env.close()