<a href="https://colab.research.google.com/github/ErinnVdSande/CGT-Project/blob/main/evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installing the libraries
code taken from: https://colab.research.google.com/drive/13_jI8YLk9ATRQSd7_3rV5rOsll7jsSz0#scrollTo=xh6gb3UWjT3p

In [None]:
%%capture
# ^ hides output
!sudo add-apt-repository -y ppa:openjdk-r/ppa
!sudo apt-get purge openjdk-*
!sudo apt-get install openjdk-8-jdk
!sudo apt-get install xvfb xserver-xephyr vnc4server python-opengl ffmpeg

In [None]:
%%capture

# ^ hides output
!pip3 install tensorflow~=1.14.0

# https://stackoverflow.com/questions/57887597/warningtensorflowentity
!pip3 install gast==0.2.2 

!pip3 install --upgrade minerl
!pip3 install pyvirtualdisplay
!pip3 install -U colabgymrender
!pip3 install stable-baselines

In [None]:
import gym
import minerl
from tqdm.notebook import tqdm
from colabgymrender.recorder import Recorder
from pyvirtualdisplay import Display
import stable_baselines
import numpy as np
import random 



# Defining wrappers
Wrap MineRL with environment wrapper so that stable baselines can interface with the MineRL environment.

code taken from: https://colab.research.google.com/drive/13_jI8YLk9ATRQSd7_3rV5rOsll7jsSz0#scrollTo=xh6gb3UWjT3p

In [None]:
class ExtractPOV(gym.ObservationWrapper):
    """

    """
    def __init__(self, env,size: int = 1):
        super().__init__(env)
        self.observation_space = gym.spaces.Box(low=0, high=255, shape=(64 * size, 64, 3))
        self.frames = []
        self.size = size

    def observation(self, observation):
        # Minecraft returns shapes in NHWC by default
        if self.size == 1:
          return observation['pov']
        else:
          if not self.frames:
            self.frames = [observation['pov'] for _ in range(self.size)]
          else:
            self.frames = [observation['pov']] + self.frames[:- 1]
          return np.concatenate(self.frames)
          


In [None]:

class ReversibleActionWrapper(gym.ActionWrapper):
    """
    The goal of this wrapper is to add a layer of functionality on top of the normal ActionWrapper,
    and specifically to implement a way to start:
    (1) Construct a wrapped environment, and
    (2) Take in actions in whatever action schema is dictated by the innermost env, and then apply all action
    transformations/restructurings in the order they would be applied during live environment steps:
    from the inside out

    This functionality is primarily intended for converting a dataset of actions stored in the action
    schema of the internal env into a dataset of actions stored in the schema produced by the applied set of
    ActionWrappers, so that you can train an imitation model on such a dataset and easily transfer to the action
    schema of the wrapped environment for rollouts, RL, etc.

    Mechanically, this is done by assuming that all ActionWrappers have a `reverse_action` implemented
    and recursively constructing a method to call all of the `reverse_action` methods from inside out.

    As an example:
        > wrapped_env = C(B(A(env)))
    If I assume all of (A, B, and C) are action wrappers, and I pass an action to wrapped_env.step(),
    that's equivalent to calling all of the `action` transformations from outside in:
        > env.step(A.action(B.action(C.action(act)))

    In the case covered by this wrapper, we want to perform the reverse operation, so we want to return:
        > C.reverse_action(B.reverse_action(A.reverse_action(inner_action)))

    To do this, the `wrap_action` method searches recursively for the base case where there are no more
    `ReversibleActionWrappers` (meaning we've either reached the base env, or all of the wrappers between us and the
    base env are not ReversibleActionWrappers) by checking whether `wrap_action` is implemented. Once we reach the base
    case, we return self.reverse_action(inner_action), and then call all of the self.reverse_action() methods on the way
    out of the recursion

    """
    def wrap_action(self, inner_action):
        """
        :param inner_action: An action in the format of the innermost env's action_space
        :return: An action in the format of the action space of the fully wrapped env
        """
        if hasattr(self.env, 'wrap_action'):
            return self.reverse_action(self.env.wrap_action(inner_action))
        else:
            return self.reverse_action(inner_action)

    def reverse_action(self, action):
        raise NotImplementedError("In order to use a ReversibleActionWrapper, you need to implement a `reverse_action` function"
                                  "that is the inverse of the transformation performed on an action that comes into the wrapper")
        
class ActionShaping(ReversibleActionWrapper):
  def __init__(
            self,
            env: gym.Env,
            camera_angle: int = 10,
            always_attack: bool = False,
            camera_margin: int = 5,
    ):
        """
        Arguments:
            env: The env to wrap.
            camera_angle: Discretized actions will tilt the camera by this number of
                degrees.
            always_attack: If True, then always send attack=1 to the wrapped environment.
            camera_margin: Used by self.wrap_action. If the continuous camera angle change
                in a dataset action is at least `camera_margin`, then the dataset action
                is discretized as a camera-change action.
        """
        super().__init__(env)

        self.camera_angle = camera_angle
        self.camera_margin = camera_margin
        self.always_attack = always_attack
        self._actions = [
            [('attack', 1)],
            [('forward', 1)],
            [('forward', 1), ('jump', 1)],
            [('camera', [-self.camera_angle, 0])],
            [('camera', [self.camera_angle, 0])],
            [('camera', [0, self.camera_angle])],
            [('camera', [0, -self.camera_angle])],
        ]

        self.actions = []
        for actions in self._actions:
            act = self.env.action_space.noop()
            for a, v in actions:
                act[a] = v
            if self.always_attack:
                act['attack'] = 1
            self.actions.append(act)

        self.action_space = gym.spaces.Discrete(len(self.actions) + 1)

  def action(self, action):
    if action == 7: 
      return self.env.action_space.noop()
    else: 
      return self.actions[action]

  def reverse_action(self, action: dict) -> np.ndarray:
        camera_actions = action["camera"].squeeze()
        attack_actions = action["attack"].squeeze()
        forward_actions = action["forward"].squeeze()
        jump_actions = action["jump"].squeeze()
        batch_size = len(camera_actions)
        actions = np.zeros((batch_size,), dtype=int)

        for i in range(len(camera_actions)):
            # Moving camera is most important (horizontal first)
            if camera_actions[i][0] < -self.camera_margin:
                actions[i] = 3
            elif camera_actions[i][0] > self.camera_margin:
                actions[i] = 4
            elif camera_actions[i][1] > self.camera_margin:
                actions[i] = 5
            elif camera_actions[i][1] < -self.camera_margin:
                actions[i] = 6
            elif forward_actions[i] == 1:
                if jump_actions[i] == 1:
                    actions[i] = 2
                else:
                    actions[i] = 1
            elif attack_actions[i] == 1:
                actions[i] = 0
            else:
                # No reasonable mapping (would be no-op)
                actions[i] = 7

        return actions

In [None]:
class DenserEnvironment(gym.Wrapper):
  def __init__(self,env,good,bad):
    super().__init__(env)
    self.good_pixels = good
    self.bad_pixels = bad

  def step(self,action):
    obs, og_reward, done, info = self.env.step(action)
    reward = og_reward
    left_top = obs[30][30]
    right_top = obs[30][31]
    left_bot = obs[31][30]
    right_bot = obs[31][31]
    for pixel in [left_top,right_top,left_bot,right_bot]:
      if pixel in self.good_pixels:
        reward += 0.01
      if pixel in self.bad_pixels:
        reward -= 0.005
    if action == 0:
        reward *= 2
    return obs,reward,done,info

# Generating a video from the policy
code taken from: https://colab.research.google.com/drive/13_jI8YLk9ATRQSd7_3rV5rOsll7jsSz0#scrollTo=W_o_mCsADWCV

In [None]:
def video_from_policy(env, policy, max_steps=None): 
  env = Recorder(env, './video', fps=60)
  display = Display(visible=0, size=(800, 600))
  display.start();

  #env.seed(21)
  obs = env.reset();
  done = False 
  total_reward = 0
  steps_taken = 0 

  while True:
      action, _ = policy.predict(obs)
      obs, reward, done, _ = env.step(action)
      total_reward += reward
      steps_taken += 1 
      if max_steps is not None and steps_taken > max_steps: 
        break 
      if done:
          break

  env.release()
  env.play()

# Save intermediate model in Google Drive
code based on: https://linuxtut.com/en/4bc6d6174bb3e8461c6d/

In [None]:
# connecting to Google Drive
import os

from google.colab import drive
drive.mount('/content/drive')
MODEL_DIR = "/content/drive/My Drive/RL-backups"
EVALUATION_DIR = "/content/drive/My Drive/RL-evaluation"
if not os.path.exists(MODEL_DIR):  #If the directory does not exist, create it.
    os.makedirs(MODEL_DIR)
if not os.path.exists(EVALUATION_DIR):  #If the directory does not exist, create it.
    os.makedirs(EVALUATION_DIR)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Setting up the environment
code taken from: https://colab.research.google.com/drive/13_jI8YLk9ATRQSd7_3rV5rOsll7jsSz0#scrollTo=W_o_mCsADWCV


In [None]:
minerl_env = gym.make("MineRLTreechop-v0")

In [None]:
obs_wrapped_treechop = ExtractPOV(minerl_env,4)

In [None]:
obs_action_wrapped_treechop = ActionShaping(obs_wrapped_treechop)

In [None]:
from stable_baselines import DQN

display = Display(visible=0, size=(400, 300))
display.start()

<pyvirtualdisplay.display.Display at 0x7fdf3c8638d0>

In [None]:
import pickle
import os

def save_to_pickle(path,name,obj):
  if not os.path.exists(path):  #If the directory does not exist, create it.
      os.makedirs(path)
  with open(path+"/"+name+".pkl","wb") as write_handle:
    pickle.dump(obj,write_handle)

def open_pickle(file_path):
  with open(file_path,"rb") as read_handle:
    return pickle.load(read_handle)


In [None]:
def evaluate_model(batch_path,env,max_steps=1500,steps=100,step_size=1000,test_runs=1):
  avg_rewards = np.zeros(steps)
  variances = np.zeros(steps)
  rewards = np.ndarray((steps,test_runs))
  for training_batch in range(1,steps+1):
    # load the model
    policy = DQN.load(MODEL_DIR + batch_path + f"/rl_model_{training_batch * step_size}_steps",env)

    current_rewards = np.zeros(test_runs)

    for run_idx in range(test_runs):  
      # [ code from https://colab.research.google.com/drive/13_jI8YLk9ATRQSd7_3rV5rOsll7jsSz0#scrollTo=W_o_mCsADWCV
      # reset the environment
      obs = env.reset();

      done = False 
      total_reward = 0
      steps_taken = 0 

      while True:
          action, _ = policy.predict(obs)
          obs, reward, done, _ = env.step(action)
          total_reward += reward
          steps_taken += 1 
          if max_steps is not None and steps_taken > max_steps: 
            break 
          if done:
              break
      # code from https://colab.research.google.com/drive/13_jI8YLk9ATRQSd7_3rV5rOsll7jsSz0#scrollTo=W_o_mCsADWCV ]
      current_rewards[run_idx] = total_reward

    avg_rewards[training_batch-1] = np.mean(current_rewards)
    rewards[training_batch-1] = current_rewards
    variances[training_batch-1] = np.var(current_rewards)

    current_data = dict(avg_rewards=avg_rewards[training_batch-1],
                        variances=variances[training_batch-1],
                        rewards=rewards[training_batch-1])

    pickle_path = f"{EVALUATION_DIR}{batch_path}"
    pickle_name = f"{training_batch * step_size}"
    save_to_pickle(pickle_path,pickle_name,current_data)

  return avg_rewards, variances, rewards

In [None]:
avg_rewards, variances, rewards = evaluate_model("/300.000",obs_action_wrapped_treechop)
data_to_save = dict(avg_rewards=avg_rewards,variances=variances,rewards=rewards)
save_to_pickle(EVALUATION_DIR,"300_000",data_to_save)







Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where




KeyboardInterrupt: ignored