In [None]:
!pip install imageio > /dev/null 2>&1

In [None]:
from google.colab import drive
PATH_TO_ENVIRONMENT = '/content/drive/MyDrive/Master_Thesis/Final/Predator-Prey_Environment'
PATH_TO_ALGORITHMS = '/content/drive/MyDrive/Master_Thesis/Final/Algorithm'
PATH_TO_SAVE_MODEL = '/content/drive/MyDrive/Master_Thesis/Final/Algorithm/Predator.pth'

drive.mount('/content/drive')

!cp {PATH_TO_ALGORITHMS}/colab_helpers.py /content
!cp {PATH_TO_ALGORITHMS}/A2C.py /content

!cp -r {PATH_TO_ENVIRONMENT}/utils /content
!cp -r {PATH_TO_ENVIRONMENT}/render_utils /content
!cp -r {PATH_TO_ENVIRONMENT}/entities /content
!cp -r {PATH_TO_ENVIRONMENT}/colliders /content
!cp {PATH_TO_ENVIRONMENT}/environment.py /content

In [None]:
import matplotlib.pyplot as plt
import numpy as np

from A2C import AgentA2C
from colab_helpers import save_video, show_video
from environment import Environment

In [None]:
# Path to save generated videos
VIDEO_FILE_PATH = '.'

# Mode in which environment is rendered
RENDER_MODE = 'rgb_array'

# Total number od episoded from which data to train VAE will be collected
TOTAL_EPISODES = 2500

# Time steps in single episode
TIME_STEPS = 2000

In [None]:
env = Environment(render_mode=RENDER_MODE,
                  plants_count=0,
                  preys_initial_count=100,
                  prey_reproduction=False,
                  prey_hunger_decay=False,
                  prey_thirst_decay=False,
                  predators_initial_count=1,
                  predator_reproduction=False,
                  predator_render_collision_outline=True,
                  )

In [None]:
env.reset()

action_space = 4
observation_space = 17

action_space_prey = 3

for predator in env.get_predators():
  print(env.obs_predator(predator))

env.step()

In [None]:
frames = []
state = env.reset()
total_reward = 0

for time_step in range(TIME_STEPS):
  done = False

  for prey in env.get_preys():
    env.dummy_step_prey(prey, np.random.randint(0, action_space_prey))

  for predator in env.get_predators():
    env.step_predator(predator, np.random.randint(0, action_space))

    if not predator.is_alive():
      done = True

  env.step()

  frame = env.render()
  frames.append(frame)

  if done:
    break

save_video(frames, f'{VIDEO_FILE_PATH}/Test of environment')
show_video(f'{VIDEO_FILE_PATH}/Test of environment.mp4')

In [None]:
all_rewards = []

for episode in range(TOTAL_EPISODES):
  env.reset()
  for prey in env.get_preys():
    prey._hunger_decay = 0
    prey._thirst_decay = 0

  total_reward = 0

  for time_step in range(TIME_STEPS):
    done = False
    for prey in env.get_preys():
      env.dummy_step_prey(prey, np.random.randint(0, action_space_prey))

    for predator in env.get_predators():
      _, reward, done = env.step_predator(predator, np.random.randint(0, action_space))

      if not predator.is_alive():
        done = True

    env.step()
    total_reward += reward

    if done or time_step == TIME_STEPS - 1:
      print(f"\rEpisode: {episode}, Reward: {total_reward}, Time steps: {time_step}", end="")
      break

  all_rewards.append(total_reward)

average_reward = sum(all_rewards) / TOTAL_EPISODES

In [None]:
window_size = 100
moving_average = np.convolve(all_rewards, np.ones(window_size)/window_size, mode='valid')

plt.figure(figsize=(12, 6))
plt.plot(all_rewards, label='Nagroda na epizod')
plt.plot(range(window_size-1, len(all_rewards)), moving_average, color='orange', linestyle='-', label=f'Średnia krocząca (rozmiar okna={window_size})')
plt.axhline(average_reward, color='r', linestyle='--', label=f'Średnia nagroda: {average_reward:.2f}')
plt.xlabel('Epizod', fontsize=16)
plt.ylabel('Całkowita nagroda', fontsize=16)
plt.legend(loc='upper right')
plt.show()

In [None]:
agent = AgentA2C(action_space, observation_space, units=256, dropout=0.25)

In [None]:
all_rewards = []

for episode in range(TOTAL_EPISODES):
  env.reset()

  for predator in env.get_predators():
    obs = env.obs_predator(predator)

  total_reward = 0

  for time_step in range(TIME_STEPS):

    for prey in env.get_preys():
      env.dummy_step_prey(prey, np.random.randint(0, action_space_prey))

    for predator in env.get_predators():
      action = agent.act(obs)
      next_obs, reward, done = env.step_predator(predator, action)
      agent.remember(obs, action, reward, done)
      obs = next_obs

    env.step()
    total_reward += reward

    if done or time_step == TIME_STEPS - 1:
      print(f"\rEpisode: {episode}, Reward: {total_reward}, Time steps: {time_step}", end="")
      agent.get_last_state(next_obs)
      agent.learn()
      break

  all_rewards.append(total_reward)

agent.save(PATH_TO_SAVE_MODEL)
average_reward = sum(all_rewards) / TOTAL_EPISODES
print()
print("Average reward:", average_reward)

In [None]:
window_size = 100
moving_average = np.convolve(all_rewards, np.ones(window_size)/window_size, mode='valid')

plt.figure(figsize=(12, 6))
plt.plot(all_rewards, label='Nagroda na epizod')
plt.plot(range(window_size-1, len(all_rewards)), moving_average, color='orange', linestyle='-', label=f'Średnia krocząca (rozmiar okna={window_size})')
plt.axhline(average_reward, color='r', linestyle='--', label=f'Średnia nagroda: {average_reward:.2f}')
plt.xlabel('Epizod', fontsize=16)
plt.ylabel('Całkowita nagroda', fontsize=16)
plt.legend(loc='lower center')
plt.show()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

axes[0].plot(agent.actor.loss_history, label='Strata aktora na epizod', color='blue')
axes[0].set_xlabel('Epizod', fontsize=16)
axes[0].set_ylabel('Strata', fontsize=16)
axes[0].legend()

axes[1].plot(agent.critic.loss_history, label='Strata krytyka na epizod', color='red')
axes[1].set_xlabel('Epizod', fontsize=16)
axes[1].set_ylabel('Strata', fontsize=16)
axes[1].legend()

plt.tight_layout()

plt.show()