# Animation

In [3]:
import ray
from ray.rllib.algorithms.algorithm import Algorithm
import numpy as np
from custom_env import CustomEnvironment
from config import run_config

def process_observations(observations, current_observation, agent_ids, termination=None):
    loc_x = [current_observation[key][0] if key in current_observation else 0 for key in agent_ids]
    loc_y = [current_observation[key][1] if key in current_observation else 0 for key in agent_ids]
    heading = [current_observation[key][2] if key in current_observation else 0 for key in agent_ids]
    if termination:
        still_in_the_game = [1 if not termination[key] else 0 for key in agent_ids]
    else:
        still_in_the_game = [1 for _ in agent_ids]
    observations["loc_x"].append(np.array(loc_x))
    observations["loc_y"].append(np.array(loc_y))
    observations["heading"].append(np.array(heading))
    observations["still_in_the_game"].append(np.array(still_in_the_game))

    return observations

def collect_observations(path_to_checkpoint, env):
    # Load the checkpoint
    algo = Algorithm.from_checkpoint(path_to_checkpoint)
    
    observations = {"loc_x": [], "loc_y": [], "heading": [], "still_in_the_game": []}
    
    # Run simulation
    observation, _ = env.reset()
    for step_count in range(run_config["env"]["episode_length"]):
        actions = {
            key: algo.compute_single_action(
                obs_batch, policy_id="prey" if env.agents[key].agent_type == 0 else "predator"
            ) for key, obs_batch in observation.items()
        }
    
        observation, _, termination, _, _ = env.step(actions)
        observations = process_observations(observations, observation, env._agent_ids, termination)
    
    grid_diagonal = env.grid_diagonal
    observations["loc_x"] = np.array(observations["loc_x"]) * grid_diagonal
    observations["loc_y"] = np.array(observations["loc_y"]) * grid_diagonal
    observations["heading"] = np.array(observations["heading"])
    observations["still_in_the_game"] = np.array(observations["still_in_the_game"])
    
    env.close()
    ray.shutdown()
    
    return observations


# Initialize environment
run_config["env"]["episode_length"] = 500
env = CustomEnvironment(run_config["env"])

path_to_checkpoint = "/Users/tanguy/ray_results/PPO_2024-01-22_17-10-28/PPO_CustomEnvironment_15f74_00000_0_2024-01-22_17-10-28/checkpoint_000001"
observations = collect_observations(path_to_checkpoint, env)


  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
`UnifiedLogger` will be removed in Ray 2.7.
  return UnifiedLogger(config, logdir, loggers=None)
The `JsonLogger interface is deprecated in favor of the `ray.tune.json.JsonLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `CSVLogger interface is deprecated in favor of the `ray.tune.csv.CSVLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `TBXLogger interface is deprecated in favor of the `ray.tune.tensorboardx.TBXLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
2024-01-22 17:42:25,875	INFO worker.py:1724 -- Started a local Ray instance.
[36m(RolloutWorker pid=31724)[0m   gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[36m(RolloutWorker pid=31732)[0m   

In [4]:
from IPython.display import HTML
from animation import generate_animation_3d

ani = generate_animation_3d(observations, env, fps=10)

HTML(ani.to_html5_video())

# Metrics

In [None]:
import matplotlib.pyplot as plt
from metrics import calculate_dos

# Load the checkpoint
general_path = "/home/jovyan/ray_results/PPO_2024-01-10_09-36-02/PPO_CustomEnvironment_ab3c9_00000_0_2024-01-10_09-36-02/checkpoint_000105"

dos = []
for i in range(11):
    path_to_checkpoint = general_path + str(i).zfill(3)
    observations = collect_observations(path_to_checkpoint, env)
    dos.append(calculate_dos(observations, env.stage_size))

# plot dos curve
plt.plot(dos)
plt.show()

In [None]:
import matplotlib.pyplot as plt

# plot dos curve
plt.plot(dos)
plt.show()

# Brouillon

In [None]:


toy_vector = np.zeros(27)
toy_vector[3] = 0 #prey
toy_vector[4] = 20 / env.grid_diagonal #distance
toy_vector[5] = 0.5 / (2 * np.pi) #angle
toy_vector[6] = 0.6 / np.pi #orientation
toy_vector[7] = 1 #prey type



In [None]:
algo.compute_single_action(toy_vector, policy_id="prey", full_fetch=True)


In [None]:
policy.make_model_and_action_dist

In [None]:
import torch

policy = algo.get_policy("prey")
fc_net_output = policy.model.encoder.actor_encoder.net.mlp(torch.Tensor(toy_vector))

outputs = policy.model.pi.net(fc_net_output).tolist()

In [None]:
policy.model.config

In [None]:
model_config = policy.model.config
catalog = model_config.catalog_class(model_config.observation_space, model_config.action_space, model_config.model_config_dict)



In [None]:
action_dist_class = catalog.get_action_dist_cls(framework="torch")

fc_net_output = policy.model.encoder.actor_encoder.net.mlp(torch.Tensor(toy_vector))

action_dist_inputs = policy.model.pi.net(fc_net_output)

action_dist = action_dist_class.from_logits(action_dist_inputs)
actions = action_dist.sample().numpy()
actions

In [None]:
action_dist

In [None]:
action_dist_class

In [None]:
policy.model

# Network visualization

In [None]:
# Example shape
print(algo.get_policy(available_policy_ids[0]).get_weights()['pi.net.mlp.0.bias'].shape)
# We create a subdictionnary with the interresting layers
actor_weights = {}
for key, value in algo.get_policy(available_policy_ids[0]).get_weights().items():
    if "critic_encoder" not in key and "vf." not in key:
        actor_weights[key] = value

actor_weights.keys()

In [None]:
import numpy as np

from graph_tool.all import *

def create_graph(neural_network):
    g = Graph(directed=True)
    
    # Create property maps for vertex and edge labels and edge width
    v_label = g.new_vertex_property("string")
    e_width = g.new_edge_property("double")
    pos = g.new_vertex_property("vector<double>")
    
    max_neurons = max(len(neural_network[key]) for key in neural_network if 'weight' in key)

    def add_layer_and_set_positions(neurons, x_pos, pos):
        layer_vertices = [g.add_vertex() for _ in neurons]
        starting_y = (max_neurons - len(layer_vertices)) / 2
        for i, v in enumerate(layer_vertices):
            pos[v] = (x_pos, starting_y + len(layer_vertices) - 1 - i)
        return layer_vertices
    
    ## VERTEX ##
    # Input Layer
    layers = [add_layer_and_set_positions(neural_network['encoder.actor_encoder.net.mlp.0.weight'].T, 0, pos)]
    
    # Hiden Layers
    x_gap = 20 # gap between layers
    biases_keys = [key for key in neural_network if ".bias" in key and "actor_encoder" in key]
    for i, biases_key in enumerate(biases_keys):
        # Add vertices for the current layer and set their positions
        layers.append(add_layer_and_set_positions(neural_network[biases_key], x_gap*(i+1), pos))
        
    # Output Layer
    output_neurons = add_layer_and_set_positions(neural_network['pi.net.mlp.0.bias'], x_gap*4, pos)

    ## EDGES ##
    # Set labels and add edges for input-hidden and hidden-hidden layer
    weights_keys = [key for key in neural_network if ".weight" in key and "actor_encoder" in key]
    for k, weights_key in enumerate(weights_keys):
        for i, hidden_neuron in enumerate(layers[k]):
            for j, next_hidden_neuron in enumerate(layers[k+1]):
                e = g.add_edge(hidden_neuron, next_hidden_neuron)
                weight = neural_network[weights_key].T[i][j]
                e_width[e] = weight

    # Set labels and add edges for hidden-output layer
    for j, output_neuron in enumerate(output_neurons):
        for i, hidden_neuron in enumerate(layers[-1]):
            e = g.add_edge(hidden_neuron, output_neuron)
            weight = neural_network['pi.net.mlp.0.weight'].T[i][j]
            e_width[e] = weight

    
    # LABELS
    # Set neuron labels (optional, for clarity)
    for v in layers[0]:
        v_label[v] = "I"
    for k, hidden_neurons in enumerate(layers):
        for v in hidden_neurons:
            v_label[v] = "H"
    for v in output_neurons:
        v_label[v] = "O"

    return g, pos, v_label, e_width

# Example usage with the same nn_wandb
g, pos, v_label, e_width = create_graph(actor_weights)

# Draw the graph
graph_draw(g, pos=pos, vertex_text=v_label, edge_text=None, edge_pen_width=e_width, vertex_size=15, vertex_font_size=10, edge_font_size=10, output_size=(800, 800))
    

In [None]:
import numpy as np

def neural_net_activation(input_vector, neural_dic):
    # Ensuring the input is a numpy array
    input_vector = np.array(input_vector)

    # Layer activations
    activations = {}

    # First three hidden layers
    for i in range(3):
        weight_key = f"encoder.actor_encoder.net.mlp.{2*i}.weight"
        bias_key = f"encoder.actor_encoder.net.mlp.{2*i}.bias"

        if i == 0:
            layer_input = input_vector
        else:
            layer_input = activations[f"layer_{i}"]

        # Calculate the layer output
        z = np.dot(layer_input, neural_dic[weight_key].T) + neural_dic[bias_key]
        activations[f"layer_{i+1}"] = np.tanh(z)

    # Output layer
    weight_key = "pi.net.mlp.0.weight"
    bias_key = "pi.net.mlp.0.bias"
    output_layer_input = activations["layer_3"]
    z = np.dot(output_layer_input, neural_dic[weight_key].T) + neural_dic[bias_key]
    activations["output"] = np.tanh(z)

    return activations

toy_vector = [0.95600354, 0.36926809,
 0.07873756, 0.38905384, 0.92927526, 0.08713003, 0.61284082,
 0.82746801, 0.36660529, 0.89503505, 0.04707359, 0.97744959,
 0.28597701, 0.96454964, 0.29067754, 0.74090134, 0.8245886, 0.96454964, 0.29067754, 0.74090134, 0.8245886,
             0.96454964, 0.29067754, 0.74090134, 0.8245886, 0.96454964, 0.29067754]

activations = neural_net_activation(toy_vector, actor_weights)

activations["output"]

In [None]:
algo.compute_single_action(toy_vector, policy_id="prey", full_fetch=True)

In [None]:
algo.compute_single_action(toy_vector, policy_id="prey")

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import interactive, FloatSlider, VBox, HBox
from IPython.display import display

def update_plot_observation(r, theta):
    x = r * np.cos(theta)
    y = r * np.sin(theta)

    plt.figure(1)
    plt.plot(x, y, 'ro') # Red point at the polar coordinates
    plt.xlim(-10, 10)
    plt.ylim(-10, 10)
    plt.axhline(0, color='black',linewidth=0.5)
    plt.axvline(0, color='black',linewidth=0.5)

    plt.show()

# Function to update the plot
def update_plot_actions(r, theta):

    toy_vector = np.zeros(27)
    toy_vector[3] = 0 #prey
    toy_vector[4] = r / env.grid_diagonal #distance
    toy_vector[5] = theta / (2 * np.pi) #angle
    toy_vector[6] = theta / np.pi #orientation
    toy_vector[7] = 1 #prey type

    policy = algo.get_policy("prey")
    fc_net_output = policy.model.encoder.actor_encoder.net.mlp(torch.Tensor(toy_vector))
    mean1, std1, mean2, std2 = policy.model.pi.net(fc_net_output).tolist()
    

    # Generate points on the x axis
    x = np.linspace(-10, 10, 1000)

    # Calculate the y values for each bell curve
    y1 = (1 / (np.sqrt(2 * np.pi) * std1)) * np.exp(-0.5 * ((x - mean1) / std1)**2)
    y2 = (1 / (np.sqrt(2 * np.pi) * std2)) * np.exp(-0.5 * ((x - mean2) / std2)**2)

    # Plotting with specific styling
    plt.figure(1)  # Size of the plot
    plt.plot(x, y1, color='blue', linestyle='-', linewidth=2, label=f'amplitude')
    plt.plot(x, y2, color='red', linestyle='--', linewidth=2, label=f'orientation')
    plt.grid(True)
    plt.legend()
    plt.show()
    
# Interactive sliders for radius and angle
radius_slider = FloatSlider(min=0, max=10, step=0.1, value=5, description='Radius (r)', orientation='vertical')
theta_slider = FloatSlider(min=-np.pi, max=np.pi, step=0.1, value=np.pi/4, description='Angle (θ)', orientation='vertical')

# Create VBox for sliders
slider_box = HBox([radius_slider, theta_slider])

# Create the interactive plot
interactive_plot1 = interactive(update_plot_observation, r=radius_slider, theta=theta_slider)
interactive_plot2 = interactive(update_plot_actions, r=radius_slider, theta=theta_slider)

# Extract the output and set the height
output1 = interactive_plot1.children[-1]
output2 = interactive_plot2.children[-1]

# Display using HBox
display(HBox([slider_box, output1, output2]))