In [None]:
#hide
#all_slow
%load_ext autoreload
%autoreload 2

## Google Colab preparations

In [None]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

IN_MAIN = __name__ == '__main__'

In [None]:
#Infrastructure for copying notebooks
if IN_COLAB and IN_MAIN:
    home_dir = '/content/drive/MyDrive/Colab Notebooks/Ecosystems/v3'
if IN_COLAB and IN_MAIN:
    from google.colab import drive
    drive.mount('/content/drive')
    import sys
    sys.path.append(home_dir)
    %cd $home_dir
    

In [None]:
if IN_COLAB and IN_MAIN:
    !cd;pip -q install import-ipynb
    !cd;pip -q install stable-baselines3[extra]
    !cd;apt install swig
    !cd;pip -q install box2d box2d-kengz
    #verbose = 0

# Preliminary imports

In [None]:
import json
import random

from IPython.display import HTML
import gym
from gym import spaces
# This has to be imported before our own notebook imports.
#import import_ipynb
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import animation
import numpy as np
import pandas as pd
import seaborn as sns

from ecotwins.animal_classes import Ecosystem, MultiSheep, SimpleSheep, Terrain
# import ecoenv
from ecotwins.ecoenv import EcoEnv
# from perception import Perception
from stable_baselines3 import PPO, A2C, SAC, DDPG, TD3 # , DQN
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
# from utility import distance, draw_objects, motion_diagram, normalize
from ecotwins.animation_helper import AnimationHelper
from ecotwins.reflex_agent import ReflexAgent
from ecotwins.animal_classes import Ecosystem, MultiSheep, SimpleSheep

# Ecosystem experiments

## World 2 (One need)
* Food objects that respawn when consumed
* Automatic consumption when close
* Reward = +1 per food item consumed
* Input: food direction
* Output: steering direction.

### RL agent

In [None]:
t = Terrain(objects={'dandelion': 1})
# t = Terrain(objects={'dandelion': (np.random.random((100,2)) - 0.5) * 20})
hyperparameters = {'max_age': 2000, 'delta': 0.2, 'close': 5, 'gamma': 0.9}
agent = SimpleSheep(distances={'dandelion':28}, hyperparameters=hyperparameters) 
eco = Ecosystem(t, agent)
env = EcoEnv(eco)

# Create the model
model = PPO('MlpPolicy', env, verbose=1)
# model.set_env(env)

# Train the model
model.learn(total_timesteps=2e5)

# Evaluate the model
# NOTE: If you use wrappers with your environment that modify rewards,
#       this will be reflected here. To evaluate with original rewards,
#       wrap environment in a "Monitor" wrapper before other wrappers.
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f'Mean reward: {mean_reward}, std. dev.: {std_reward}')

# Save the model
#model.save("ecosystem")
#del model  # delete trained model to demonstrate loading

# Load the trained model
#model = DQN.load("ecosystem", env=env)

# Enjoy trained model
# obs = env.reset() # Generate a new map? Returns an initial observation
# trace = [env.position.copy()]
# total_reward = 0
# # for i in range(ecoenv.TRACE_LENGTH): # Take a walk of length ecoenv.TRACE_LENGTH (not EPISODE_LENGTH as in training)
# for i in range(2000):
#     action, _states = model.predict(obs, deterministic=True) # Select action
#     obs, reward, dones, info = env.step(action) # Compute consequences
#     assert(reward >= 0)
#     total_reward += reward
#     trace.append(env.position.copy())
# trace = np.array(trace)
# env.render(trace) # Show walk
# plt.title(f'Total reward: {total_reward}');

#### Animation 

In [None]:
%%capture
a = AnimationHelper(env, model)
a.init_animation();

In [None]:
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=2000, interval=50, blit=True)
HTML(anim.to_html5_video())

## World 4 (Two needs)
* Food and water objects that disappear and respawn
* Automatic consumption when close
* reward = energy*water
* Input: energy direction and water direction
* Output: steering direction.
* Movement costs enery and water

In [None]:
def world4():
    terrain_args = {"objects": {"dandelion": 20, "water": 20}}
    # terrain_args = {"objects": {"dandelion": 20*(np.random.random((20,2))-.5),
                                # "water": 20*(np.random.random((20,2))-.5)}}
    agent_args = {
        "hyperparameters": {"max_age": 2000, "delta": 0.1, "close": 0.5},
        # "distances": {"dandelion": 28, "water": 28},
        "distances": {"dandelion": 5, "water": 5},
        "interoception": {"energy": 3, "water": 3},
        "use_interoception_as_obs": True,
        "use_intensity_as_obs": True,
        "use_multi_direction": True,
        "use_single_direction": True,
        "use_happiness_as_obs": True, 
        "use_reward_radius_in_perception": True,
        "normalize_action": False,
        # "action_noise": 0.05,
        "homeostatic_effects": {  # Nutrition table
            ("move", None): {"energy": -0.01, "water": -0.01},
            ("consume", "water"): {"energy": 0, "water": 1},
            ("consume", "dandelion"): {"energy": 1, "water": 0.0},
        },
    }

    t = Terrain(**terrain_args)
    agent = MultiSheep(**agent_args)
    eco = Ecosystem(t, agent)
    env = EcoEnv(eco)
    return env

### Random agent

In [None]:
from collections import defaultdict
from ecotwins.utility import normalize

class RandomAgent:
    def __init__(self, eco, n_dir=2):
        self.eco = eco
        self.cur_dir = 0
        self.max_dir = n_dir 

         # Used to determine when to change direction. The direction is changed when
         # we have found/consemd an object.
        self.e_levels = env.agent.interoception.copy()


    def predict(self, observation, **kwargs):
        action = normalize(np.random.random((2)) - 0.5)
        return action, None # None needed in order to mimic stable-baseline3


    def episode(self, n_step=None):
        n_steps = np.iinfo(int).max if n_step is None else n_steps
        obs = self.eco.reset()
        
        for i in range(n_steps):
            action = normalize(np.random.random((2)) - 0.5)
            obs, reward, done, _ = self.eco.step(action)

            if done:
                break

#### Animation

In [None]:
%%capture
env = world4()
env.agent.hyperparameters['delta'] = 1
model = RandomAgent(env)
a = AnimationHelper(env, model)
a.init_animation();

In [None]:
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)
HTML(anim.to_html5_video())

### Reflex agent

#### Animation

In [None]:
%%capture
env = world4()
model = ReflexAgent(env, n_dir=4)
a = AnimationHelper(env, model)
a.init_animation();

In [None]:
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)
HTML(anim.to_html5_video())

### RL agent

In [None]:
extend_training = False
env = world4()
num_cpu = 4
env = DummyVecEnv([world4 for i in range(num_cpu)])

device = 'cpu'
if not extend_training:
    # Instantiate the agent
    model = A2C('MlpPolicy', env, verbose=1, use_sde=True, device=device)
    # model = TD3('MlpPolicy', env, verbose=1, device='cuda')
else:
    model.set_env(env)

# Train the model
model.learn(total_timesteps=2e5, log_interval=400)
# model.learn(total_timesteps=2e5)

# Evaluate the model
mean_reward, std_reward = evaluate_policy(model, world4(), n_eval_episodes=10)
print(f'Mean reward: {mean_reward:.2f}, std. dev.: {std_reward:.2f}')

# Save the model
# model.save("multi_sheep_champion.pth")

# Load the model
# model = PPO.load("multi_sheep_champion.pth")

#### Animation

In [None]:
%%capture 
env = world4()
a = AnimationHelper(env, model)
a.init_animation(show_consumed=False);

In [None]:
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=2000, interval=50, blit=True)
HTML(anim.to_html5_video())

In [None]:
# model.save("multi_sheep_wo_normalize-2021-12-13.pth")

In [None]:
# FFwriter = animation.FFMpegWriter(fps=60)
# anim.save('animation.mp4', writer = FFwriter)
# anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)

anim.save('multi_sheep_20211213-with-happiness-obs.mp4',
          fps=30,
          extra_args=['-vcodec', 'h264',  '-pix_fmt', 'yuv420p']
          )

## World 6 (Three needs) Kanske skakar ett litet nätverk mindre?
* Three kinds of respawning consumable objects: dandelion, water, grass
* Automatic consumption when close
* reward = energy * water * protein
* Input: 3 x object perception
* Output: steering direction.


In [None]:
def world6():
    terrain_args = {
        # "objects": {"dandelion": 100, "water": 100, "grass": 100},
        "objects": {"dandelion": 20, "water": 20, "grass": 20},
        "torus": False
    }
    #     terrain_args = {"objects": {"dandelion": 200,   "water": 200,   "grass": 200}}
    # terrain_args = {"objects": {"dandelion": 20*(np.random.random((20,2))-.5), "water": 20*(np.random.random((20,2))-.5), "grass": 20*(np.random.random((20,2))-.5)}}
    agent_args = {
        "hyperparameters": {"max_age": 2000, "delta": 0.1, "close": .5, "gamma": 0.9},
        # "distances": {"dandelion": 28, "water": 28},
        "distances": {"dandelion": 5, "water": 5, "grass": 5},
        "interoception": {"energy": 3, "water": 3, "protein": 3},
        "use_interoception_as_obs": True,
        "use_intensity_as_obs": True,
        "use_single_direction": True,
        "use_multi_direction": True,
        "use_happiness_as_obs": True,
        "use_reward_radius_in_perception": True,
        "normalize_action": False,
#         "perception_noise": 0.01,
        "homeostatic_effects": {  # Nutrition table
            ("move", None): {"energy": -0.01, "water": -0.01, "protein": -0.01},
            ("consume", "water"): {"energy": 0, "water": 1, "protein": 0},
            ("consume", "dandelion"): {"energy": 1, "water": 0, "protein": 0},
            ("consume", "grass"): {"energy": 0, "water": 0, "protein": 1},
        },
        "n_frames":1,
    }

    t = Terrain(**terrain_args)
    agent = MultiSheep(**agent_args)
    eco = Ecosystem(t, agent)
    env = EcoEnv(eco)
    return env

### RL Agent

In [None]:
extend_training = True
env = world6()

if not extend_training:
    # Instantiate the agent
    device='cpu'
    model = A2C('MlpPolicy', env, verbose=1, use_sde=True, device=device)
else:
    model.set_env(env)

# model = PPO('MlpPolicy', env, verbose=1)

# Train the agent
model.learn(total_timesteps=2e5, log_interval=400)

mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f'Mean reward: {mean_reward}, std. dev.: {std_reward}')

# Save the model
# model.save("multi_sheep_with_normalization.pth")

# Load the model
# model = PPO.load("multi_sheep_with_normalization.pth")

#### Animation

In [None]:
%%capture
env = world6()
a = AnimationHelper(env, model)
a.init_animation(show_consumed=False);


In [None]:
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=2000, interval=50, blit=True)
HTML(anim.to_html5_video())

In [None]:
FFwriter = animation.FFMpegWriter(fps=60)
# anim.save('3needs.mp4', writer = FFwriter)
anim.save('3needs.mp4',
          fps=30,
          extra_args=['-vcodec', 'h264',  '-pix_fmt', 'yuv420p']
          )

### Reflex agent

In [None]:
%%capture
env = world6()
m = ReflexAgent(env, 3)
a = AnimationHelper(env, m)
a.init_animation(show_consumed=False);

#### Animation

In [None]:
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)
HTML(anim.to_html5_video())

## Thorny world

In [None]:
# Thorny world. Food and thorn objects. Lose energy (blood) when close to thorn. 

def thorny_world():
    terrain_args = {"objects": {"dandelion": 20, "thorn": 20}}
    # terrain_args = {"objects": {"dandelion": 20*(np.random.random((20,2))-.5),
                                # "water": 20*(np.random.random((20,2))-.5)}}
    agent_args = {
        "hyperparameters": {"max_age": 2000, "delta": 0.1, "close": .25},
        # "distances": {"dandelion": 28, "water": 28},
        "distances": {"dandelion": 10, "thorn": 5},
        "interoception": {"energy": 3},
        "use_interoception_as_obs": True,
        "use_intensity_as_obs": True,
        "use_single_direction": True,
        "use_multi_direction": True,
        "use_reward_radius_in_perception": True,
        "use_happiness_as_obs": True,
        "normalize_action": False,
        "homeostatic_effects": {  # Nutrition table
            ("move", None): {"energy": -0.01},
            ("consume", "thorn"): {"energy": -5},
            ("consume", "dandelion"): {"energy": 1},
        },
    }

    t = Terrain(**terrain_args)
    agent = MultiSheep(**agent_args) # need: only energy, object perception: dandelion and thorn
    eco = Ecosystem(t, agent)
    env = EcoEnv(eco)
    return env

### RL agent

In [None]:
env = thorny_world()

# Create the model
# model = PPO('MlpPolicy', env, verbose=1, use_sde=True, tensorboard_log='./tb_logs')
model = A2C('MlpPolicy', env, verbose=1, use_sde=True, tensorboard_log='./tb_logs', device='cuda')
# model = A2C('MlpPolicy', env, verbose=1, use_sde=True, tensorboard_log='./tb_logs', device='cpu')

# Train the model
model.learn(total_timesteps=2e5, log_interval=400, eval_freq=10000)

# Evaluate the model
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f'Mean reward: {mean_reward:.2f}, std. dev.: {std_reward:.2f}')

# Save the model
# model.save("multi_sheep_champion.pth")

# Load the model
# model = PPO.load("multi_sheep_champion.pth")

#### Animation

In [None]:
%%capture
# Perhaps a somewhat unclean way of suppressing the cell output.
env = thorny_world()
a = AnimationHelper(env, model)
a.init_animation(show_consumed=False);

In [None]:
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=2000, interval=50, blit=True)
HTML(anim.to_html5_video())

### RL agent with reward similar to World 2

In [None]:
import types
# The modified happiness function
def h(agent, t=None):
    return agent.interoception['energy'] + agent.p_happiness


env = thorny_world()
env.agent._init_p_happiness = types.MethodType(lambda x: 0.0, env.agent)
env.agent.happiness = types.MethodType(h, env.agent)

In [None]:
model = A2C('MlpPolicy', env, verbose=1, use_sde=True, tensorboard_log='./tb_logs', device='cpu')

# Train the model
model.learn(total_timesteps=300, log_interval=400, eval_freq=10000)

# Evaluate the model
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f'Mean reward: {mean_reward:.2f}, std. dev.: {std_reward:.2f}')

## Discussion

### Variations
* Vary object quantity
* Vary object nutrition
* Make the objects move
* reward as happiness or happiness delta
* Vary stimulus function
* Vary perception radius and reward radius
* With or without weber 
* Nearest direction or multi-direction
✈

### Improvements
*   Death criteria (alive): Should be formulated in terms of homeostasis, not in terms of interoception or happiness = 0. Define alive as a function in the class Organism. Cf. the function in Dandelion.
* Add support for simple (asexual) reproduction. Respawning is a form of reproduction for plants and animals (when dying). Plants can be added or removed in other ways, e.g. at random times, not just when grazed. Not water pools though. 
* Distances should depend on object type (and agent). Both perception radius (elephant vs mosquito) and reward radius.  

### Possible next steps

**More sheep worlds**

* One-need sheep (energy). Food takes time to eat. Eat during 10 steps. Small reward when close. Then repawn. Use multi (partially eaten food).
* Two-need sheep (energy and water). Objects: food, water, thorns. 
* Modest sheep (energy and water).
    * Max energy and water levels (max_homeostasis). 
    * Add consciousness: energy, water (interoception) + happiness

**New animals**

More details in animal_classes.

* Moth + its world with lightbulb objects. Modify/replace world 2.
* Silverfish + its world. Same as the moth's world.
* Pill bug + its world. 
    * Energy consumption is proportional to speed^2
    * Good to increase speed when in a dry place.
* Cat + its world.
    * Energy consumption is proportional to speed^2
    * There are mice objects too. They move randomly.
    * Good to increase speed when gradient_intensity is high (near mice) 
* Social sheep + its world 
    * There are sheep objects too. They move randomly or not at all.
    * Add oxytocin when near sheep objects. Add number of sheep near (or total_intensity)
    * Burn oxytocin with time: -0.01
* Discrete sheep + its world
    * Action space: up, down, left, right. 
    * Also good for Pacman.
* Fish + its world.
    * Use a 3D observation and action spaces. 
    * Also good for flying animals
* Try to find the perfect nursery for learning to survive as fast as posible


# Animation helper class

In [None]:
#export animation_helper
class AnimationHelper:
    def __init__(self, env, model):
        self.env = env
        self.model = model

    def init_animation(self, show_consumed=True):

        env = self.env
        self.show_consumed = show_consumed

        # create a figure and axes
        # self.fig = plt.figure(figsize=(12, 12))
        # self.ax1 = plt.subplot(1, 1, 1)
        # self.ax1 = plt.axes()
        self.fig, self.ax1 = plt.subplots()
        self.fig.set_size_inches(12,12)

        # set up the subplots as needed
        s = env.ecosystem.terrain.space
        lower_left = [s[0, 0], s[1, 0]]
        x_side = s[0, 1] - s[0, 0]
        y_side = s[1, 1] - s[1, 0]
        x_lim, y_lim = s[0], s[1]

        self.ax1.set_xlim(x_lim)
        self.ax1.set_ylim(y_lim)

        self.obs = self.env.reset()

        # TODO: Add all circles as is done with objects below.
        self.circle = matplotlib.patches.Circle(
            env.position, next(iter(env.agent.distances.values())), fc="y", alpha=0.1
        )
        self.reward_radius = env.agent.hyperparameters["close"]
        self.reward_circle = matplotlib.patches.Circle(
            env.position, self.reward_radius, fc="r", alpha=0.3
        )
        self.ax1.add_patch(self.circle)
        self.ax1.add_patch(self.reward_circle)

        # This region might not be needed if we only plotting region is the terrain
        # space.
        env_region = matplotlib.patches.Rectangle(
            lower_left, x_side, y_side, fc="b", alpha=0.05
        )
        self.ax1.add_patch(env_region)

        # objects_plot = ax1.scatter(objects[:,0], objects[:,1], marker='x')
        self.objects_plot = {}
        for name, pts in env.ecosystem.terrain.objects.items():
            self.objects_plot[name] = self.ax1.scatter(pts[:, 0], pts[:, 1], label=name)
        self.consumed_plot = {
            name: self.ax1.scatter([], [], marker="x", label=f"Consumed {name}")
            for name in env.ecosystem.terrain.objects.keys()
        } if self.show_consumed else {}

        plt.legend()

        # create objects that will change in the animation. These are
        # initially empty, and will be given new values for each frame
        # in the animation.
        self.txt_title = self.ax1.set_title("Initial configuration")
        (self.line1,) = self.ax1.plot(
            [], [], "g", lw=2
        )  # ax.plot returns a list of 2D line objects
        (self.line2,) = self.ax1.plot([], [], "r", lw=2)
        self.trace = []

        # obs_action stores observation, action tuples.
        self.obs_action = []

        return (
            self.line1,
            *self.objects_plot.values(),
            *self.consumed_plot.values(),
            self.circle,
            self.reward_circle,
        )

    ### drawframe cell starts here
    # animation function. This is called sequentially
    def _gen_title_string(self, n):
        title = [f'Frame: {n:4d}']
        for name, v in self.env.agent.interoception.items():
            title.append(f'{name}: {v:.2f}')

        terrain = self.env.ecosystem.terrain
        title.append(f'Happiness: {self.env.agent.happiness(terrain):.2f}')
        return ' '.join(title)

    def drawframe(self, n):
        obs = self.obs
        model = self.model
        env = self.env

        action, _states = model.predict(obs, deterministic=True)
        self.obs_action.append((obs, action))
        self.obs, reward, dones, info = env.step(action)
        self.trace.append(env.position.copy())
        x, y = zip(*self.trace)
        self.line1.set_data(x, y)
        for name, ax_obj in self.objects_plot.items():
            ax_obj.set_offsets(env.ecosystem.terrain.objects[name])

        if self.show_consumed:
            for name, ax_obj in self.consumed_plot.items():
                ax_obj.set_offsets(
                    np.array(env.ecosystem.terrain.consumed[name]).reshape(-1, 2)
                )

        self.circle.center = env.position
        self.reward_circle.center = env.position

        # title = f'Frame = {n:4d}, '\
        #     f'Energy: {env.agent.interoception["energy"]:.2f}, '\
        #     f'Water: {env.agent.interoception["water"]:.2f}, '\
        #     f'Protein: {env.agent.interoception["protein"]:.2f}, '\
        #     f'Happiness: {env.agent.happiness():.2f}'
        # self.txt_title.set_text(title)
        self.txt_title.set_text(self._gen_title_string(n))

        if dones:
            print(f'Done @{n}, {env.agent.age}, {env.ecosystem.terrain.counter}')
            self.trace.clear()
            env.reset()
        return (
            self.line1,
            *self.objects_plot.values(),
            *self.consumed_plot.values(),
            self.circle,
            self.reward_circle,
        )


if False and __name__ == "__main__":
    env = MothEnv()
    a = AnimationHelper(env, 4)
    print("Some elementary test should be here")



# ReflexAgent

In [None]:
from nbdev.export import notebook2script; notebook2script()