In [None]:
!pip install imageio > /dev/null 2>&1

In [None]:
from google.colab import drive
PATH_TO_ENVIRONMENT = '/content/drive/MyDrive/Master_Thesis/Final/Predator-Prey_Environment'
PATH_TO_ALGORITHMS = '/content/drive/MyDrive/Master_Thesis/Final/Algorithm'
PATH_TO_SAVE_MODEL = '/content/drive/MyDrive/Master_Thesis/Final/Algorithm/Prey_Final.pth'
PATH_TO_PREDATOR_MODEL = '/content/drive/MyDrive/Master_Thesis/Final/Algorithm/Predator.pth'
PATH_TO_PREY_MODEL = '/content/drive/MyDrive/Master_Thesis/Final/Algorithm/Prey.pth'

drive.mount('/content/drive')

!cp {PATH_TO_ALGORITHMS}/colab_helpers.py /content
!cp {PATH_TO_ALGORITHMS}/A2C.py /content

!cp -r {PATH_TO_ENVIRONMENT}/utils /content
!cp -r {PATH_TO_ENVIRONMENT}/render_utils /content
!cp -r {PATH_TO_ENVIRONMENT}/entities /content
!cp -r {PATH_TO_ENVIRONMENT}/colliders /content
!cp {PATH_TO_ENVIRONMENT}/environment.py /content

In [None]:
import matplotlib.pyplot as plt
import numpy as np

from A2C import AgentA2C
from colab_helpers import save_video, show_video
from environment import Environment

In [None]:
# Path to save generated videos
VIDEO_FILE_PATH = '.'

# Mode in which environment is rendered
RENDER_MODE = 'rgb_array'

# Total number od episoded from which data to train VAE will be collected
TOTAL_EPISODES = 250

TIME_STEPS = 2000

predator_agent = AgentA2C(4, 17, units=256, dropout=0.25)
predator_agent.load(PATH_TO_PREDATOR_MODEL)

prey_agent = AgentA2C(4, 23, units=256, dropout=0.25)
prey_agent.load(PATH_TO_PREY_MODEL)

In [None]:
env = Environment(render_mode=RENDER_MODE,
                  plant_growth_time_steps=1000,
                  plants_count=200,
                  preys_initial_count=1,
                  prey_reproduction=False,
                  predators_initial_count=5,
                  predator_reproduction=False,
                  predator_hunger_decay=False,
                  predator_thirst_decay=False)

In [None]:
env.reset()

action_space = 4
observation_space = 23

for prey in env.get_preys():
  print(env.obs_prey(prey))

env.step()

In [None]:
frames = []
state = env.reset()
total_reward = 0

for prey in env.get_preys():
  obs = env.obs_prey(prey)

for time_step in range(TIME_STEPS):
  done = False

  for prey in env.get_preys():
    action = prey_agent.act(obs)
    next_obs, reward, done = env.step_prey(prey, action)
    obs = next_obs

    if not prey.is_alive():
      done = True

  for predator in env.get_predators():
    env.step_predator(predator, np.random.randint(0, action_space))

  env.step()

  frame = env.render()
  frames.append(frame)

  if done:
    break

save_video(frames, f'{VIDEO_FILE_PATH}/Test of environment')
show_video(f'{VIDEO_FILE_PATH}/Test of environment.mp4')

In [None]:
all_rewards = []

for episode in range(TOTAL_EPISODES):
  env.reset()
  total_reward = 0

  for prey in env.get_preys():
    obs = env.obs_prey(prey)

  for time_step in range(TIME_STEPS):
    done = False

    for prey in env.get_preys():
      action = prey_agent.act(obs)
      next_obs, reward, done = env.step_prey(prey, action)
      obs = next_obs

    for predator in env.get_predators():
      env.step_predator(predator, np.random.randint(0, action_space))

    env.step()
    total_reward += reward

    if done or time_step == TIME_STEPS - 1:
      print(f"\rEpisode: {episode}, Reward: {total_reward}, Time steps: {time_step}", end="")
      break

  all_rewards.append(total_reward)

average_reward = sum(all_rewards) / TOTAL_EPISODES

In [None]:
window_size = 10
moving_average = np.convolve(all_rewards, np.ones(window_size)/window_size, mode='valid')

plt.figure(figsize=(12, 6))
plt.plot(all_rewards, label='Nagroda na epizod')
plt.plot(range(window_size-1, len(all_rewards)), moving_average, color='orange', linestyle='-', label=f'Średnia krocząca (rozmiar okna={window_size})')
plt.axhline(average_reward, color='r', linestyle='--', label=f'Średnia nagroda: {average_reward:.2f}')
plt.xlabel('Epizod', fontsize=16)
plt.ylabel('Całkowita nagroda', fontsize=16)
plt.legend(loc='upper right')
plt.show()

In [None]:
all_rewards = []

for episode in range(TOTAL_EPISODES):
  env.reset()
  total_reward = 0

  for prey in env.get_preys():
    obs = env.obs_prey(prey)

  total_reward = 0

  for time_step in range(TIME_STEPS):

    for prey in env.get_preys():
      action = prey_agent.act(obs)
      next_obs, reward, done = env.step_prey(prey, action)

      prey_agent.remember(obs, action, reward, done)
      obs = next_obs

    for predator in env.get_predators():
      env.step_predator(predator, np.random.randint(0, action_space))

    env.step()
    total_reward += reward

    if done or time_step == TIME_STEPS - 1:
      print(f"\rEpisode: {episode}, Reward: {total_reward}, Time steps: {time_step}", end="")
      prey_agent.get_last_state(next_obs)
      prey_agent.learn()
      break

  all_rewards.append(total_reward)

prey_agent.save(PATH_TO_SAVE_MODEL)
average_reward = sum(all_rewards) / TOTAL_EPISODES
print()
print("Average reward:", average_reward)

In [None]:
window_size = 10
moving_average = np.convolve(all_rewards, np.ones(window_size)/window_size, mode='valid')

plt.figure(figsize=(12, 6))
plt.plot(all_rewards, label='Nagroda na epizod')
plt.plot(range(window_size-1, len(all_rewards)), moving_average, color='orange', linestyle='-', label=f'Średnia krocząca (rozmiar okna={window_size})')
plt.axhline(average_reward, color='r', linestyle='--', label=f'Średnia nagroda: {average_reward:.2f}')
plt.xlabel('Epizod', fontsize=16)
plt.ylabel('Całkowita nagroda', fontsize=16)
plt.legend(loc='lower center')
plt.show()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

axes[0].plot(prey_agent.actor.loss_history, label='Strata aktora na epizod', color='blue')
axes[0].set_xlabel('Epizod', fontsize=16)
axes[0].set_ylabel('Strata', fontsize=16)
axes[0].legend()

axes[1].plot(prey_agent.critic.loss_history, label='Strata krytyka na epizod', color='red')
axes[1].set_xlabel('Epizod', fontsize=16)
axes[1].set_ylabel('Strata', fontsize=16)
axes[1].legend()

plt.tight_layout()

plt.show()