## Initialisations

In [1]:
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path
from tqdm import trange
import pickle

from visual_taxis import NMFVisualTaxis, MovingObjArena
from flygym.envs.nmf_mujoco import MuJoCoParameters

from util import linear_schedule, SaveIntermediateModelsCallback
from tqdm import trange
from flygym.util.config import all_leg_dofs

from stable_baselines3 import PPO

  if not hasattr(tensorboard, "__version__") or LooseVersion(
  ) < LooseVersion("1.15"):


In [2]:
decision_dt = 0.05

arena = MovingObjArena(obj_spawn_pos=(5, 3, 0), move_mode="s_shape")
arena.reset(new_spawn_pos=True, new_move_mode=False)
spawn = arena.init_ball_pos
sim_params = MuJoCoParameters(render_playspeed=0.2, render_camera="Animat/camera_top_zoomout", render_raw_vision=True)
sim = NMFVisualTaxis(
    sim_params=sim_params,
    arena=arena,
    decision_dt=decision_dt,
    n_stabilisation_steps=5000,
    obj_threshold=50,
)

dr = f'../../logs_orient_'
model_name = "saved_model_MLPlinearlr_continue"
out_dir = Path(dr+f"/eval_{model_name}")
out_dir.mkdir(parents=True, exist_ok=True)

n_base = int(2/decision_dt)

In [3]:
# Load model from file
nmf_model = PPO.load(dr+"/"+model_name)

print(nmf_model.policy)

ActorCriticPolicy(
  (features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (pi_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (vf_features_extractor): FlattenExtractor(
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (mlp_extractor): MlpExtractor(
    (policy_net): Sequential(
      (0): Linear(in_features=6, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): Tanh()
    )
    (value_net): Sequential(
      (0): Linear(in_features=6, out_features=64, bias=True)
      (1): Tanh()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): Tanh()
    )
  )
  (action_net): Linear(in_features=64, out_features=2, bias=True)
  (value_net): Linear(in_features=64, out_features=1, bias=True)
)


## Simulation runs

In [4]:
distances = []
rew_straight = []
rew_s = []
folds = 20

for f in range(folds):
    print(f)
    obs, _ = sim.reset()
    spawn = sim.arena.init_ball_pos

    obs_hist = []
    visual_hist = []
    action_history = []
    rewards_hist = []

    for i in trange(n_base):
        action, _ = nmf_model.predict(obs, deterministic=True)
        obs, rew, term, trunc, info = sim.step(np.array(action))
        sim.render()
        obs_hist.append(obs)
        visual_hist.append(sim.curr_raw_visual_input)
        action_history.append(action)
        rewards_hist.append(rew)

    obs_hist = np.array(obs_hist)

    print(action)
    print(rewards_hist[-1])
    plt.plot(np.arange(n_base), np.cumsum(rewards_hist))
    plt.savefig(out_dir / f"{f}-{spawn}totrewards.png")
    plt.clf()
    plt.plot(np.arange(n_base), rewards_hist)
    plt.savefig(out_dir / f"{f}-{spawn}rewards.png")
    plt.clf()
    plt.plot(np.arange(n_base), action_history)
    plt.savefig(out_dir / f"{f}-{spawn}actions.png")
    plt.clf()
    sim.save_video(out_dir / f'{f}-{spawn}resultcust.mp4')

    # Save relevant metrics
    if sim.arena.move_mode == "straightHeading":
        rew_straight.append(rewards_hist)
    elif sim.arena.move_mode == "s_shape":
        rew_s.append(rewards_hist)

sim.close()

0


100%|██████████| 40/40 [02:25<00:00,  3.64s/it]


[-0.5387659  -0.16200714]
-0.5093550536856306
1


100%|██████████| 40/40 [02:26<00:00,  3.67s/it]


[-0.723995   0.3109061]
-0.47550122451224364
2


100%|██████████| 40/40 [02:33<00:00,  3.83s/it]


[-0.29389524 -0.5702161 ]
0.13335577286753164
3


100%|██████████| 40/40 [02:35<00:00,  3.89s/it]


[ 0.26205567 -1.        ]
1.0770524775796675
4


100%|██████████| 40/40 [02:32<00:00,  3.81s/it]


[ 0.05572075 -0.9124717 ]
0.015705067024739394
5


100%|██████████| 40/40 [02:28<00:00,  3.72s/it]


[-0.670684 -1.      ]
-0.09055884145232063
6


100%|██████████| 40/40 [02:39<00:00,  3.99s/it]


[-0.04526377 -1.        ]
-0.6057511836293661
7


100%|██████████| 40/40 [02:39<00:00,  4.00s/it]


[ 0.24936941 -1.        ]
0.35259789854369283
8


100%|██████████| 40/40 [02:44<00:00,  4.12s/it]


[-0.10727063 -1.        ]
0.13470058656860018
9


100%|██████████| 40/40 [02:35<00:00,  3.90s/it]


[-0.08943292 -0.93621475]
-0.09802428011791875
10


100%|██████████| 40/40 [02:39<00:00,  3.98s/it]


[-0.25766382 -0.6071467 ]
-1.0590784357269867
11


100%|██████████| 40/40 [02:40<00:00,  4.01s/it]


[-0.17039344 -0.9039048 ]
0.08491159604617105
12


100%|██████████| 40/40 [02:40<00:00,  4.00s/it]


[-0.51608884 -0.02955884]
0.013250106665502193
13


100%|██████████| 40/40 [02:44<00:00,  4.11s/it]


[-0.95558965 -0.5357065 ]
-0.31888049148998787
14


100%|██████████| 40/40 [02:41<00:00,  4.03s/it]


[-0.09673512 -0.6846596 ]
-0.00728214461867327
15


100%|██████████| 40/40 [02:36<00:00,  3.92s/it]


[-0.44582462 -0.27450848]
-0.19669709036807648
16


100%|██████████| 40/40 [02:39<00:00,  3.98s/it]


[-0.7745311   0.40421343]
-0.017259513773093538
17


100%|██████████| 40/40 [02:34<00:00,  3.86s/it]


[-0.247797  -0.5061549]
0.04656932404290104
18


100%|██████████| 40/40 [02:50<00:00,  4.27s/it]


[-0.0897052 -1.       ]
-0.0005848610676695287
19


100%|██████████| 40/40 [02:49<00:00,  4.25s/it]


[-0.11027715 -1.        ]
-0.08380517616990768


<Figure size 640x480 with 0 Axes>

In [5]:
rewards = [np.array(rew_straight), np.array(rew_s)]
colors = [["tab:blue","tab:cyan"], ["orangered", "orange"], ["tab:green", "limegreen"]]
name = ["Straight", "S-shaped", "Both"]

# Saving results
with open(dr+"/rewards.pickle",'wb') as f:
    pickle.dump(rewards, f)
f.close()

print(rewards[0].shape, rewards[1].shape)

(15, 40) (5, 40)


## Plots of results

In [6]:
# Plotting mean and std of metrics across all runs
time = np.arange(rewards[0].shape[1])*0.05
for c in range(len(rewards)):
    col = colors[c]
    if len(rewards[c].shape)>1:
        mean_r = np.mean(rewards[c], axis=0)
        std_r = np.std(rewards[c], axis=0)
        plt.fill_between(time, mean_r-std_r, mean_r+std_r, color=col[1])
        plt.plot(time,mean_r, c=col[0])
        plt.title(name[c]+" trajectory")
        plt.xlabel("Time [s]")
        plt.ylabel("Instant reward")
        plt.legend()
        plt.savefig(out_dir / f"rewards_avg_std{c}")
        plt.clf()
        #plt.show()



<Figure size 640x480 with 0 Axes>

In [7]:
for c in range(len(rewards)):
    col = colors[c]
    if len(rewards[c].shape)>1:
        cumul_rewards = np.cumsum(rewards[c], axis=1)
        mean_r = np.mean(cumul_rewards, axis=0)
        std_r = np.std(cumul_rewards, axis=0)
        plt.fill_between(time, mean_r-std_r, mean_r+std_r, color=col[1])
        plt.plot(time,mean_r, c=col[0], label=name[c])
plt.xlabel("Time [s]")
plt.ylabel("Cumulative reward")
plt.legend()
plt.savefig(out_dir / "cumulrewards_avg_std")
plt.clf()
#plt.show()

<Figure size 640x480 with 0 Axes>

In [8]:
# For all runs combined
rewards_combined = [item for line in rewards for item in line]
rewards_combined = np.array(rewards_combined)
col = colors[2]

mean_r = np.mean(rewards_combined, axis=0)
std_r = np.std(rewards_combined, axis=0)
plt.fill_between(time, mean_r-std_r, mean_r+std_r, color=col[1])
plt.plot(time,mean_r, c=col[0])
plt.title(name[2]+" trajectories")
plt.xlabel("Time [s]")
plt.ylabel("Instant reward")
plt.legend()
plt.savefig(out_dir / f"rewards_avg_std")
plt.clf()
#plt.show()



<Figure size 640x480 with 0 Axes>