In [None]:
#hide
%load_ext autoreload
%autoreload 2

## Google Colab preparations

In [None]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

IN_MAIN = __name__ == '__main__'

In [None]:
#Infrastructure for copying notebooks
if IN_COLAB and IN_MAIN:
    home_dir = '/content/drive/MyDrive/Colab Notebooks/Ecosystems/v3'
if IN_COLAB and IN_MAIN:
    from google.colab import drive
    drive.mount('/content/drive')
    import sys
    sys.path.append(home_dir)
    %cd $home_dir
    

In [None]:
if IN_COLAB and IN_MAIN:
    !cd;pip -q install import-ipynb
    !cd;pip -q install stable-baselines3[extra]
    !cd;apt install swig
    !cd;pip -q install box2d box2d-kengz
    #verbose = 0

# Preliminary imports

In [None]:
import json
import random

from IPython.display import HTML
import gym
from gym import spaces
# This has to be imported before our own notebook imports.
#import import_ipynb
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import animation
import numpy as np
import pandas as pd
import seaborn as sns

from ecotwins.animal_classes import Ecosystem, MultiSheep, SimpleSheep, Terrain
# import ecoenv
from ecotwins.ecoenv import EcoEnv
# from perception import Perception
from stable_baselines3 import PPO, A2C, SAC, DDPG, TD3 # , DQN
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
# from utility import distance, draw_objects, motion_diagram, normalize
from ecotwins.animation_helper import AnimationHelper
from ecotwins.reflex_agent import ReflexAgent
from ecotwins.animal_classes import Ecosystem, MultiSheep, SimpleSheep

# Ecosystem experiments

## World 2 (One need)
* Food objects that respawn when consumed
* Automatic consumption when close
* Reward = +1 per food item consumed
* Input: food direction
* Output: steering direction.

### RL agent

In [None]:
t = Terrain(objects={'dandelion': 1})
# t = Terrain(objects={'dandelion': (np.random.random((100,2)) - 0.5) * 20})
hyperparameters = {'max_age': 2000, 'delta': 0.2, 'close': 5, 'gamma': 0.9}
agent = SimpleSheep(distances={'dandelion':28}, hyperparameters=hyperparameters) 
eco = Ecosystem(t, agent)
env = EcoEnv(eco)

# Create the model
model = PPO('MlpPolicy', env, verbose=1)
# model.set_env(env)

# Train the model
model.learn(total_timesteps=2e4)

# Evaluate the model
# NOTE: If you use wrappers with your environment that modify rewards,
#       this will be reflected here. To evaluate with original rewards,
#       wrap environment in a "Monitor" wrapper before other wrappers.
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f'Mean reward: {mean_reward}, std. dev.: {std_reward}')

# Save the model
#model.save("ecosystem")
#del model  # delete trained model to demonstrate loading

# Load the trained model
#model = DQN.load("ecosystem", env=env)

# Enjoy trained model
# obs = env.reset() # Generate a new map? Returns an initial observation
# trace = [env.position.copy()]
# total_reward = 0
# # for i in range(ecoenv.TRACE_LENGTH): # Take a walk of length ecoenv.TRACE_LENGTH (not EPISODE_LENGTH as in training)
# for i in range(2000):
#     action, _states = model.predict(obs, deterministic=True) # Select action
#     obs, reward, dones, info = env.step(action) # Compute consequences
#     assert(reward >= 0)
#     total_reward += reward
#     trace.append(env.position.copy())
# trace = np.array(trace)
# env.render(trace) # Show walk
# plt.title(f'Total reward: {total_reward}');

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Reset@0, accumulated reward: 0.00, Interoception levels: energy:3.00, water:2.00 happiness: 0.00
Reset@2001, accumulated reward: 0.00, Interoception levels: energy:3.00, water:2.00 happiness: 0.00
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2e+03    |
|    ep_rew_mean     | 0        |
| time/              |          |
|    fps             | 577      |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 2048     |
---------------------------------
Reset@2001, accumulated reward: 7.00, Interoception levels: energy:3.00, water:2.00 happiness: 7.00
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2e+03       |
|    ep_rew_mean          | 3.5         |
| time/                   |             |
|    fps                  | 435         |
|    iterations 

#### Animation 

In [None]:
%%capture
a = AnimationHelper(env, model)
a.init_animation();

In [None]:
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)
HTML(anim.to_html5_video())

## World 4 (Two needs)
* Food and water objects that disappear and respawn
* Automatic consumption when close
* reward = energy*water
* Input: energy direction and water direction
* Output: steering direction.
* Movement costs enery and water

In [None]:
def world4():
    terrain_args = {"objects": {"dandelion": 20, "water": 20}}
    # terrain_args = {"objects": {"dandelion": 20*(np.random.random((20,2))-.5),
                                # "water": 20*(np.random.random((20,2))-.5)}}
    agent_args = {
        "hyperparameters": {"max_age": 2000, "delta": 0.1, "close": 0.5},
        # "distances": {"dandelion": 28, "water": 28},
        "distances": {"dandelion": 5, "water": 5},
        "interoception": {"energy": 3, "water": 3},
        "use_interoception_as_obs": True,
        "use_intensity_as_obs": True,
        "use_multi_direction": True,
        "use_single_direction": True,
        "use_happiness_as_obs": True, 
        "use_reward_radius_in_perception": True,
        "normalize_action": False,
        "action_noise": 0.05,
        "homeostatic_effects": {  # Nutrition table
            ("move", None): {"energy": -0.01, "water": -0.01},
            ("consume", "water"): {"energy": 0, "water": 1},
            ("consume", "dandelion"): {"energy": 1, "water": 0.0},
        },
    }

    t = Terrain(**terrain_args)
    agent = MultiSheep(**agent_args)
    eco = Ecosystem(t, agent)
    env = EcoEnv(eco)
    return env

### Random agent

In [None]:
from collections import defaultdict
from ecotwins.utility import normalize

class RandomAgent:
    def __init__(self, eco, n_dir=2):
        self.eco = eco
        self.cur_dir = 0
        self.max_dir = n_dir 

         # Used to determine when to change direction. The direction is changed when
         # we have found/consemd an object.
        self.e_levels = env.agent.interoception.copy()


    def predict(self, observation, **kwargs):
        action = normalize(np.random.random((2)) - 0.5)
        return action, None # None needed in order to mimic stable-baseline3


    def episode(self, n_step=None):
        n_steps = np.iinfo(int).max if n_step is None else n_steps
        obs = self.eco.reset()
        
        for i in range(n_steps):
            action = normalize(np.random.random((2)) - 0.5)
            obs, reward, done, _ = self.eco.step(action)

            if done:
                break

#### Animation

In [None]:
%%capture
env = world4()
env.agent.hyperparameters['delta'] = 1
model = RandomAgent(env)
a = AnimationHelper(env, model)
a.init_animation();

In [None]:
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)
HTML(anim.to_html5_video())

### Reflex agent

#### Animation

In [None]:
%%capture
env = world4()
model = ReflexAgent(env, n_dir=4)
a = AnimationHelper(env, model)
a.init_animation();

In [None]:
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)
HTML(anim.to_html5_video())

### RL agent

In [None]:
extend_training = False
env = world4()
num_cpu = 4
env = DummyVecEnv([world4 for i in range(num_cpu)])

device = 'cpu'
if not extend_training:
    # Instantiate the agent
    model = A2C('MlpPolicy', env, verbose=1, use_sde=True, device=device)
    # model = TD3('MlpPolicy', env, verbose=1, device='cuda')
else:
    model.set_env(env)

# Train the model
model.learn(total_timesteps=2e4, log_interval=400)
# model.learn(total_timesteps=2e4)

# Evaluate the model
mean_reward, std_reward = evaluate_policy(model, world4(), n_eval_episodes=50)
print(f'Mean reward: {mean_reward:.2f}, std. dev.: {std_reward:.2f}')

# Save the model
# model.save("multi_sheep_champion.pth")

# Load the model
# model = PPO.load("multi_sheep_champion.pth")



Using cpu device
Reset@0, accumulated reward: 0.00, Interoception levels: energy:3.00, water:3.00 happiness: 9.00
Reset@0, accumulated reward: 0.00, Interoception levels: energy:3.00, water:3.00 happiness: 9.00
Reset@0, accumulated reward: 0.00, Interoception levels: energy:3.00, water:3.00 happiness: 9.00
Reset@0, accumulated reward: 0.00, Interoception levels: energy:3.00, water:3.00 happiness: 9.00
Reset@300, accumulated reward: -10.65, Interoception levels: energy:0.00, water:2.00 happiness: 0.00
Reset@300, accumulated reward: -10.65, Interoception levels: energy:1.00, water:0.00 happiness: 0.00
Reset@300, accumulated reward: -10.65, Interoception levels: energy:0.00, water:2.00 happiness: 0.00
Reset@400, accumulated reward: -10.20, Interoception levels: energy:0.00, water:0.00 happiness: 0.00
Reset@300, accumulated reward: -10.65, Interoception levels: energy:1.00, water:0.00 happiness: 0.00
Reset@300, accumulated reward: -10.65, Interoception levels: energy:0.00, water:0.00 happi



Reset@0, accumulated reward: 0.00, Interoception levels: energy:3.00, water:3.00 happiness: 9.00
Reset@800, accumulated reward: -8.40, Interoception levels: energy:0.00, water:1.00 happiness: 0.00
Reset@1000, accumulated reward: -7.50, Interoception levels: energy:0.00, water:1.00 happiness: 0.00
Reset@700, accumulated reward: -8.85, Interoception levels: energy:6.00, water:0.00 happiness: 0.00
Reset@300, accumulated reward: -10.65, Interoception levels: energy:0.00, water:0.00 happiness: 0.00
Reset@300, accumulated reward: -10.65, Interoception levels: energy:0.00, water:2.00 happiness: 0.00
Reset@400, accumulated reward: -10.20, Interoception levels: energy:1.00, water:0.00 happiness: 0.00
Reset@800, accumulated reward: -8.40, Interoception levels: energy:0.00, water:0.00 happiness: 0.00
Reset@700, accumulated reward: -8.85, Interoception levels: energy:8.00, water:0.00 happiness: 0.00
Reset@600, accumulated reward: -9.30, Interoception levels: energy:0.00, water:1.00 happiness: 0.00

#### Animation

In [None]:
%%capture 
env = world4()
a = AnimationHelper(env, model)
a.init_animation(show_consumed=False);

In [None]:
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)
HTML(anim.to_html5_video())

In [None]:
model.save("multi_sheep_wo_normalize-2021-12-13.pth")

In [None]:
# FFwriter = animation.FFMpegWriter(fps=60)
# anim.save('animation.mp4', writer = FFwriter)
# anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)

anim.save('multi_sheep_20211213-with-happiness-obs.mp4',
          fps=30,
          extra_args=['-vcodec', 'h264',  '-pix_fmt', 'yuv420p']
          )

## World 6 (Three needs) Kanske skakar ett litet nätverk mindre?
* Three kinds of respawning consumable objects: dandelion, water, grass
* Automatic consumption when close
* reward = energy * water * protein
* Input: 3 x object perception
* Output: steering direction.


In [None]:
def world6():
    terrain_args = {
        # "objects": {"dandelion": 100, "water": 100, "grass": 100},
        "objects": {"dandelion": 20, "water": 20, "grass": 20},
        "torus": False
    }
    #     terrain_args = {"objects": {"dandelion": 200,   "water": 200,   "grass": 200}}
    # terrain_args = {"objects": {"dandelion": 20*(np.random.random((20,2))-.5), "water": 20*(np.random.random((20,2))-.5), "grass": 20*(np.random.random((20,2))-.5)}}
    agent_args = {
        "hyperparameters": {"max_age": 2000, "delta": 0.1, "close": .5, "gamma": 0.9},
        # "distances": {"dandelion": 28, "water": 28},
        "distances": {"dandelion": 5, "water": 5, "grass": 5},
        "interoception": {"energy": 3, "water": 3, "protein": 3},
        "use_interoception_as_obs": True,
        "use_intensity_as_obs": True,
        "use_single_direction": True,
        "use_multi_direction": True,
        "use_happiness_as_obs": True,
        "use_reward_radius_in_perception": True,
        "normalize_action": False,
#         "perception_noise": 0.01,
        "homeostatic_effects": {  # Nutrition table
            ("move", None): {"energy": -0.01, "water": -0.01, "protein": -0.01},
            ("consume", "water"): {"energy": 0, "water": 1, "protein": 0},
            ("consume", "dandelion"): {"energy": 1, "water": 0, "protein": 0},
            ("consume", "grass"): {"energy": 0, "water": 0, "protein": 1},
        },
        "n_frames":1,
    }

    t = Terrain(**terrain_args)
    agent = MultiSheep(**agent_args)
    eco = Ecosystem(t, agent)
    env = EcoEnv(eco)
    return env

### RL Agent

In [None]:
extend_training = False
env = world6()

if not extend_training:
    # Instantiate the agent
    device='cpu'
    model = A2C('MlpPolicy', env, verbose=1, use_sde=True, device=device)
else:
    model.set_env(env)

# model = PPO('MlpPolicy', env, verbose=1)

# Train the agent
model.learn(total_timesteps=2e4, log_interval=400)

mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f'Mean reward: {mean_reward}, std. dev.: {std_reward}')

# Save the model
# model.save("multi_sheep_with_normalization.pth")

# Load the model
# model = PPO.load("multi_sheep_with_normalization.pth")



Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Reset@0, accumulated reward: 0.00, Interoception levels: energy:3.00, water:3.00, protein:3.00 happiness: 27.00
Reset@300, accumulated reward: -25.95, Interoception levels: energy:0.00, water:2.00, protein:2.00 happiness: 0.00
Reset@300, accumulated reward: -25.95, Interoception levels: energy:0.00, water:0.00, protein:0.00 happiness: 0.00
Reset@300, accumulated reward: -25.95, Interoception levels: energy:1.00, water:1.00, protein:0.00 happiness: 0.00
Reset@300, accumulated reward: -25.95, Interoception levels: energy:0.00, water:1.00, protein:0.00 happiness: 0.00
Reset@300, accumulated reward: -25.95, Interoception levels: energy:1.00, water:1.00, protein:0.00 happiness: 0.00
Reset@300, accumulated reward: -25.95, Interoception levels: energy:1.00, water:0.00, protein:0.00 happiness: 0.00
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 300 

#### Animation

In [None]:
%%capture
env = world6()
a = AnimationHelper(env, model)
a.init_animation(show_consumed=False);


In [None]:
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)
HTML(anim.to_html5_video())

In [None]:
FFwriter = animation.FFMpegWriter(fps=60)
# anim.save('3needs.mp4', writer = FFwriter)
anim.save('3needs.mp4',
          fps=30,
          extra_args=['-vcodec', 'h264',  '-pix_fmt', 'yuv420p']
          )

Done @96, 300, 0
Reset@300, accumulated reward: -25.95, Interoception levels: energy:0.00, water:2.00, protein:1.00 happiness: 0.00


### Reflex agent

In [None]:
%%capture
env = world6()
m = ReflexAgent(env, 3)
a = AnimationHelper(env, m)
a.init_animation(show_consumed=False);

#### Animation

In [None]:
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)
HTML(anim.to_html5_video())

## Thorny world

In [None]:
# Thorny world. Food and thorn objects. Lose energy (blood) when close to thorn. 

def thorny_world():
    terrain_args = {"objects": {"dandelion": 20, "thorn": 20}}
    # terrain_args = {"objects": {"dandelion": 20*(np.random.random((20,2))-.5),
                                # "water": 20*(np.random.random((20,2))-.5)}}
    agent_args = {
        "hyperparameters": {"max_age": 2000, "delta": 0.1, "close": .25},
        # "distances": {"dandelion": 28, "water": 28},
        "distances": {"dandelion": 10, "thorn": 5},
        "interoception": {"energy": 3},
        "use_interoception_as_obs": True,
        "use_intensity_as_obs": True,
        "use_single_direction": True,
        "use_multi_direction": True,
        "use_reward_radius_in_perception": True,
        "use_happiness_as_obs": True,
        "homeostatic_effects": {  # Nutrition table
            ("move", None): {"energy": -0.01},
            ("consume", "thorn"): {"energy": -5},
            ("consume", "dandelion"): {"energy": 1},
        },
    }

    t = Terrain(**terrain_args)
    agent = MultiSheep(**agent_args) # need: only energy, object perception: dandelion and thorn
    eco = Ecosystem(t, agent)
    env = EcoEnv(eco)
    return env

### RL agent

In [None]:
env = thorny_world()

# Create the model
# model = PPO('MlpPolicy', env, verbose=1, use_sde=True, tensorboard_log='./tb_logs')
model = A2C('MlpPolicy', env, verbose=1, use_sde=True, tensorboard_log='./tb_logs', device='cuda')
# model = A2C('MlpPolicy', env, verbose=1, use_sde=True, tensorboard_log='./tb_logs', device='cpu')

# Train the model
model.learn(total_timesteps=2e4, log_interval=400, eval_freq=10000)

# Evaluate the model
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=50)
print(f'Mean reward: {mean_reward:.2f}, std. dev.: {std_reward:.2f}')

# Save the model
# model.save("multi_sheep_champion.pth")

# Load the model
# model = PPO.load("multi_sheep_champion.pth")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Reset@0, accumulated reward: 0.00, Interoception levels: energy:3.00 happiness: 3.00
Logging to ./tb_logs/A2C_3




Reset@47, accumulated reward: -8.40, Interoception levels: energy:-2.47 happiness: -2.47
Reset@270, accumulated reward: -10.29, Interoception levels: energy:-4.70 happiness: -4.70
Reset@75, accumulated reward: -8.64, Interoception levels: energy:-2.75 happiness: -2.75
Reset@462, accumulated reward: -9.93, Interoception levels: energy:-4.62 happiness: -4.62
Reset@300, accumulated reward: -5.55, Interoception levels: energy:0.00 happiness: 0.00
Reset@300, accumulated reward: -5.55, Interoception levels: energy:0.00 happiness: 0.00
Reset@300, accumulated reward: -5.55, Interoception levels: energy:0.00 happiness: 0.00
Reset@10, accumulated reward: -8.08, Interoception levels: energy:-2.10 happiness: -2.10
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 220      |
|    ep_rew_mean        | -7.75    |
| time/                 |          |
|    fps                | 194      |
|    iterations         | 400      |
|    time_elapsed       | 10 

#### Animation

In [None]:
%%capture
# Perhaps a somewhat unclean way of suppressing the cell output.
env = thorny_world()
a = AnimationHelper(env, model)
a.init_animation(show_consumed=False);

In [None]:
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)
HTML(anim.to_html5_video())

### RL agent with reward similar to World 2

In [None]:
import types
# The modified happiness function
def h(agent, t=None):
    return agent.interoception['energy'] + agent.p_happiness


env = thorny_world()
env.agent._init_p_happiness = types.MethodType(lambda x: 0.0, env.agent)
env.agent.happiness = types.MethodType(h, env.agent)



In [None]:
model = A2C('MlpPolicy', env, verbose=1, use_sde=True, tensorboard_log='./tb_logs', device='cpu')

# Train the model
model.learn(total_timesteps=300, log_interval=400, eval_freq=10000)

# Evaluate the model
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=50)
print(f'Mean reward: {mean_reward:.2f}, std. dev.: {std_reward:.2f}')

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Reset@0, accumulated reward: 0.00, Interoception levels: energy:3.00 happiness: 6.00
Logging to ./tb_logs/A2C_4
Reset@300, accumulated reward: 445.95, Interoception levels: energy:0.00 happiness: 451.50
Reset@0, accumulated reward: 0.00, Interoception levels: energy:3.00 happiness: 3.00
Reset@300, accumulated reward: 445.95, Interoception levels: energy:0.00 happiness: 451.50
Reset@300, accumulated reward: 445.95, Interoception levels: energy:0.00 happiness: 451.50
Reset@400, accumulated reward: 640.60, Interoception levels: energy:0.00 happiness: 646.00
Reset@400, accumulated reward: 760.60, Interoception levels: energy:0.00 happiness: 766.00
Reset@5, accumulated reward: 6.86, Interoception levels: energy:-2.05 happiness: 12.85
Reset@400, accumulated reward: 708.60, Interoception levels: energy:0.00 happiness: 714.00
Reset@300, accumulated reward: 445.95, Interoception levels: energy:0.00 hap

## Discussion

### Variations
* Vary object quantity
* Vary object nutrition
* Make the objects move
* reward as happiness or happiness delta
* Vary stimulus function
* Vary perception radius and reward radius
* With or without weber 
* Nearest direction or multi-direction
✈

### Improvements
*   Death criteria (alive): Should be formulated in terms of homeostasis, not in terms of interoception or happiness = 0. Define alive as a function in the class Organism. Cf. the function in Dandelion.
* Add support for simple (asexual) reproduction. Respawning is a form of reproduction for plants and animals (when dying). Plants can be added or removed in other ways, e.g. at random times, not just when grazed. Not water pools though. 
* Distances should depend on object type (and agent). Both perception radius (elephant vs mosquito) and reward radius.  

### Possible next steps

**More sheep worlds**

* One-need sheep (energy). Food takes time to eat. Eat during 10 steps. Small reward when close. Then repawn. Use multi (partially eaten food).
* Two-need sheep (energy and water). Objects: food, water, thorns. 
* Modest sheep (energy and water).
    * Max energy and water levels (max_homeostasis). 
    * Add consciousness: energy, water (interoception) + happiness

**New animals**

More details in animal_classes.

* Moth + its world with lightbulb objects. Modify/replace world 2.
* Silverfish + its world. Same as the moth's world.
* Pill bug + its world. 
    * Energy consumption is proportional to speed^2
    * Good to increase speed when in a dry place.
* Cat + its world.
    * Energy consumption is proportional to speed^2
    * There are mice objects too. They move randomly.
    * Good to increase speed when gradient_intensity is high (near mice) 
* Social sheep + its world 
    * There are sheep objects too. They move randomly or not at all.
    * Add oxytocin when near sheep objects. Add number of sheep near (or total_intensity)
    * Burn oxytocin with time: -0.01
* Discrete sheep + its world
    * Action space: up, down, left, right. 
    * Also good for Pacman.
* Fish + its world.
    * Use a 3D observation and action spaces. 
    * Also good for flying animals
* Try to find the perfect nursery for learning to survive as fast as posible


In [None]:
from nbdev.export import notebook2script; notebook2script()

Converted 00_core.ipynb.
Converted animal_classes.ipynb.
Converted animation_helper.ipynb.
Converted ecoenv.ipynb.
Converted happiness.ipynb.
Converted index.ipynb.
Converted perception.ipynb.
Converted reflex_agent.ipynb.
Converted tyckande.ipynb.
Converted utility.ipynb.
Converted worlds.ipynb.
