# AD2C : Test Bed

This Jupter Notebook aims to Experiment with different Component of AD2C framework Like task, ESC, Callback and Loggers. This will also include advance experimental setup to test the trainied model. 


----


## Imports

In [1]:
import sys
import os
import hydra
import wandb
import sys
import hydra
from hydra.core.hydra_config import HydraConfig
from hydra.core.global_hydra import GlobalHydra
from omegaconf import DictConfig, OmegaConf

# Benchmarl & Project Imports
import benchmarl.models
from benchmarl.algorithms import *
from benchmarl.environments import VmasTask
from benchmarl.experiment import Experiment
from benchmarl.hydra_config import (
    load_algorithm_config_from_hydra,
    load_experiment_config_from_hydra,
    load_task_config_from_hydra,
    load_model_config_from_hydra,
)

# Custom Callbacks
from het_control.callback import *
from het_control.environments.vmas import render_callback
from het_control.models.het_control_mlp_empirical import HetControlMlpEmpiricalConfig
from het_control.callbacks.esc_callback import ExtremumSeekingController
from het_control.callbacks.sndESLogger import TrajectorySNDLoggerCallback

import numpy as np
import torch
import matplotlib.pyplot as plt
import networkx as nx
import wandb
from tensordict import TensorDict
from typing import List
from benchmarl.experiment.callback import Callback



## SND Ploting Callback
This allow us to display plots for the snd. from the eval run. 

---

In [15]:
def generate_snd_visualizations(snd_matrix, n_agents, step_count):
    """
    Generates 3 matplotlib figures (Heatmap, Bar, Graph) for a given SND matrix.
    Returns a dictionary of wandb.Image objects.
    Assumes snd_matrix is an N x N Symmetric Distance Matrix.
    """
    plots = {}
    
    # Define pairs (Upper triangle only, since matrix is symmetric: 1-2 is same as 2-1)
    pairs = [(i, j) for i in range(n_agents) for j in range(i + 1, n_agents)]
    
    # ==========================================
    # 1. HEATMAP with Cell Values
    # ==========================================
    fig_heat, ax_heat = plt.subplots(figsize=(6, 5))
    im = ax_heat.imshow(snd_matrix, cmap='viridis', interpolation='nearest', vmin=0, vmax=3)
    
    ax_heat.set_title(f'SND Matrix (Heatmap) - Step {step_count}')
    ax_heat.set_xlabel('Agent Index')
    ax_heat.set_ylabel('Agent Index')
    
    # Set ticks to be integers (Agent 0, Agent 1...)
    ax_heat.set_xticks(np.arange(n_agents))
    ax_heat.set_yticks(np.arange(n_agents))
    
    fig_heat.colorbar(im, ax=ax_heat, label='Distance')
    
    # Loop over data dimensions and create text annotations.
    for i in range(n_agents):
        for j in range(n_agents):
            val = snd_matrix[i, j]
            
            # Text color logic: White for dark background (low values), Black for light (high values)
            # Viridis: Low values (purple) -> White text. High values (yellow) -> Black text.
            # Scale is 0-3. Midpoint roughly 1.5.
            text_color = "white" if val < 1.0 else "black"
            
            # Print value centered in the cell
            ax_heat.text(j, i, f"{val:.2f}",
                         ha="center", va="center", color=text_color, 
                         fontsize=8, fontweight='bold')

    plt.tight_layout()
    plots["Visuals/SND_Heatmap"] = wandb.Image(fig_heat)
    plt.close(fig_heat)

    # ==========================================
    # 2. BAR CHART (Pairwise Values)
    # ==========================================
    if len(pairs) > 0:
        # Extract values for the unique pairs (upper triangle)
        pair_values = [snd_matrix[p[0], p[1]] for p in pairs]
        pair_labels = [f"A{p[0]}-A{p[1]}" for p in pairs]
        
        fig_bar, ax_bar = plt.subplots(figsize=(8, 5))
        bars = ax_bar.bar(pair_labels, pair_values, color='teal')
        
        ax_bar.set_title(f'Pairwise Distances - Step {step_count}')
        ax_bar.set_ylabel('Distance')
        ax_bar.set_ylim(0, 3) 
        ax_bar.tick_params(axis='x', rotation=45)
        
        # Add values on top of bars
        ax_bar.bar_label(bars, fmt='%.2f', padding=3)

        plt.tight_layout()
        plots["Visuals/SND_BarChart"] = wandb.Image(fig_bar)
        plt.close(fig_bar)

    # ==========================================
    # 3. NETWORK GRAPH (Topology)
    # ==========================================
    if len(pairs) > 0:
        fig_graph = plt.figure(figsize=(7, 7))
        G = nx.Graph()
        
        # Add edges for unique pairs
        for u, v in pairs:
            dist = snd_matrix[u, v]
            G.add_edge(u, v, weight=dist)

        pos = nx.spring_layout(G, seed=42)
        weights = [G[u][v]['weight'] for u, v in G.edges()]
        
        # Draw Nodes
        nx.draw_networkx_nodes(G, pos, node_size=600, node_color='lightblue')
        nx.draw_networkx_labels(G, pos, font_weight='bold')
        
        # Draw Edges
        edges = nx.draw_networkx_edges(G, pos, 
                               edge_color=weights, 
                               edge_cmap=plt.cm.viridis, 
                               width=2,
                               edge_vmin=0,
                               edge_vmax=3)
        
        # Draw Edge Labels (The distance values on the lines)
        edge_labels = {
            (u, v): f"{d['weight']:.2f}" 
            for u, v, d in G.edges(data=True)
        }
        nx.draw_networkx_edge_labels(
            G, pos, 
            edge_labels=edge_labels, 
            font_color='black', 
            font_size=8,
            font_weight='bold'
        )
        
        plt.colorbar(edges, label='Distance')
        plt.title(f'Interaction Graph - Step {step_count}')
        plt.axis('off')
        plots["Visuals/SND_NetworkGraph"] = wandb.Image(fig_graph)
        plt.close(fig_graph)

    return plots

In [16]:
class SNDVisualizerCallback(Callback):
    """
    A visualization-only callback that computes the SND (Behavioral Distance) matrix
    at evaluation time and logs Heatmap, Bar Chart, and Graph visualizations to WandB.
    """
    def __init__(self):
        super().__init__()
        self.control_group = None
        self.model = None

    def on_setup(self):
        """Auto-detects the agent group and initializes the model wrapper."""
        if not self.experiment.group_policies:
            print("\nWARNING: No group policies found. SND Visualizer disabled.\n")
            return

        # Auto-detect: Simply grab the first available control group
        self.control_group = list(self.experiment.group_policies.keys())[0]
        
        policy = self.experiment.group_policies[self.control_group]
        
        # We assume 'get_het_model' is available in your scope
        self.model = get_het_model(policy)

        if self.model is None:
             print(f"\nWARNING: Could not extract HetModel for group '{self.control_group}'. Visualizer disabled.\n")

    def _get_agent_actions_for_rollout(self, rollout):
        """Helper to run the forward pass and get actions for SND computation."""
        obs = rollout.get((self.control_group, "observation"))
        actions = []
        for i in range(self.model.n_agents):
            temp_td = TensorDict(
                {(self.control_group, "observation"): obs},
                batch_size=obs.shape[:-1]
            )
            # Ensure _forward exists and returns a TensorDict with the output key
            action_td = self.model._forward(temp_td, agent_index=i, compute_estimate=False)
            actions.append(action_td.get(self.model.out_key))
        return actions

    def on_evaluation_end(self, rollouts: List[TensorDict]):
        """Runs at the end of evaluation to compute SND and log plots."""
        if self.model is None:
            return

        logs_to_push = {}
        first_rollout_snd_matrix = None

        with torch.no_grad():
            for i, r in enumerate(rollouts):
                # We only need the matrix from the first rollout for clean visualization
                if i > 0: 
                    break

                agent_actions = self._get_agent_actions_for_rollout(r)
                
                # Compute behavioral distance (Assumed to be available in scope)
                # Must return N x N matrix
                pairwise_distances_tensor = compute_behavioral_distance(agent_actions, just_mean=False)
                
                # If the function returns (Time x N x N), average over Time to get (N x N)
                if pairwise_distances_tensor.ndim > 2:
                    pairwise_distances_tensor = pairwise_distances_tensor.mean(dim=0)

                first_rollout_snd_matrix = pairwise_distances_tensor.cpu().numpy()

        # Generate and Log Visualizations
        if first_rollout_snd_matrix is not None:
            visual_logs = generate_snd_visualizations(
                snd_matrix=first_rollout_snd_matrix, 
                n_agents=self.model.n_agents,
                step_count=self.experiment.n_iters_performed
            )
            logs_to_push.update(visual_logs)
            
            # Update the logger
            self.experiment.logger.log(logs_to_push, step=self.experiment.n_iters_performed)

## Env Setup

This code block compiles all the different section of MARl env together to Run the experiment

---

In [17]:
def setup(task_name):
    benchmarl.models.model_config_registry.update(
        {
            "hetcontrolmlpempirical": HetControlMlpEmpiricalConfig,
        }
    )
    if task_name == "vmas/navigation":
        # Set the render callback for the navigation case study
        VmasTask.render_callback = render_callback

def get_experiment(cfg: DictConfig) -> Experiment:
    hydra_choices = HydraConfig.get().runtime.choices
    task_name = hydra_choices.task
    algorithm_name = hydra_choices.algorithm

    setup(task_name)

    print(f"\nAlgorithm: {algorithm_name}, Task: {task_name}")
    # print("\nLoaded config:\n") # Optional: Commented out to reduce clutter
    # print(OmegaConf.to_yaml(cfg))

    algorithm_config = load_algorithm_config_from_hydra(cfg.algorithm)
    experiment_config = load_experiment_config_from_hydra(cfg.experiment)
    task_config = load_task_config_from_hydra(cfg.task, task_name)
    critic_model_config = load_model_config_from_hydra(cfg.critic_model)
    model_config = load_model_config_from_hydra(cfg.model)

    if isinstance(algorithm_config, (MappoConfig, IppoConfig, MasacConfig, IsacConfig)):
        model_config.probabilistic = True
        model_config.scale_mapping = algorithm_config.scale_mapping
        algorithm_config.scale_mapping = (
            "relu"  # The scaling of std_dev will be done in the model
        )
    else:
        model_config.probabilistic = False

    experiment = Experiment(
        task=task_config,
        algorithm_config=algorithm_config,
        model_config=model_config,
        critic_model_config=critic_model_config,
        seed=cfg.seed,
        config=experiment_config,
        callbacks=[
            SndCallback(),
            SNDVisualizerCallback(),
            # ExtremumSeekingController(
            #             control_group="agents",
            #             # initial_snd=0.0,
            #             dither_magnitude=0.1,
            #             dither_frequency_rad_s=1.0,
            #             integral_gain=-0.01,
            #             high_pass_cutoff_rad_s=1.0,
            #             low_pass_cutoff_rad_s=1.0,
            #             sampling_period=1.0
            # ),
            # TrajectorySNDLoggerCallback(control_group="agents"),
            NormLoggerCallback(),
            ActionSpaceLoss(
                use_action_loss=cfg.use_action_loss, action_loss_lr=cfg.action_loss_lr
            ),
        ]
        + (
            [
                TagCurriculum(
                    cfg.simple_tag_freeze_policy_after_frames,
                    cfg.simple_tag_freeze_policy,
                )
            ]
            if task_name == "vmas/simple_tag"
            else []
        ),
    )
    return experiment

## Training Code

Trains the model for 200 episodes. 

In [None]:

ABS_CONFIG_PATH = "/home/grad/doc/2027/spatel2/AD2C_testBed/AD2C/ControllingBehavioralDiversity/het_control/conf"
CONFIG_NAME = "navigation_ippo"  # Make sure 'navigation_ippo.yaml' exists in the folder above!
SAVE_PATH = "/home/grad/doc/2027/spatel2/AD2C_testBed/model_checkpoints/navigation_ippo_esc/"

save_interval = 600000
desired_snd = 0.5
max_frame = 6000000

if not os.path.exists(SAVE_PATH):
    print(f"Creating missing directory: {SAVE_PATH}")
    os.makedirs(SAVE_PATH, exist_ok=True)

GlobalHydra.instance().clear()

sys.argv = [
    "dummy.py",
    f"model.desired_snd={desired_snd}",
    f"experiment.max_n_frames={max_frame}",
    f"experiment.checkpoint_interval={save_interval}",
    f"experiment.save_folder={SAVE_PATH}", # FIXED: Removed space after '='
]

# 3. Define the Hydra wrapper
@hydra.main(version_base=None, config_path=ABS_CONFIG_PATH, config_name=CONFIG_NAME)
def hydra_experiment(cfg: DictConfig) -> None:
    print(f"Config loaded from: {ABS_CONFIG_PATH}")
    if wandb.run is not None:
        print("Finishing previous WandB run...")
        wandb.finish()

    
    print(f"Running with SND: {cfg.model.desired_snd}")
    
    experiment = get_experiment(cfg=cfg)
    experiment.run()
    wandb.finish()

# 4. Execute safely
if __name__ == "__main__":
    try:
        hydra_experiment()
    except SystemExit:
        print("Experiment finished successfully.")
    except Exception as e:
        print(f"An error occurred: {e}")

Config loaded from: /home/grad/doc/2027/spatel2/AD2C_testBed/AD2C/ControllingBehavioralDiversity/het_control/conf
Running with SND: 0.0

Algorithm: ippo, Task: vmas/navigation






mean return = -0.13405990600585938:   1%|          | 1/100 [00:37<1:01:03, 37.00s/it]

mean return = 0.1228489950299263:   2%|▏         | 2/100 [01:13<59:29, 36.42s/it]    

mean return = 0.3207175135612488:   3%|▎         | 3/100 [01:45<55:54, 34.58s/it]

mean return = 0.483396053314209:   4%|▍         | 4/100 [02:20<55:43, 34.82s/it] 

mean return = 0.5501216650009155:   5%|▌         | 5/100 [02:56<55:37, 35.14s/it]

mean return = 0.5974336862564087:   6%|▌         | 6/100 [03:28<53:42, 34.29s/it]

mean return = 0.6486825942993164:   7%|▋         | 7/100 [04:00<51:35, 33.29s/it]

mean return = 0.6349563002586365:   8%|▊         | 8/100 [04:40<54:30, 35.55s/it]

mean return = 0.6514759659767151:   9%|▉         | 9/100 [05:18<55:11, 36.39s/it]

mean return = 0.6742706298828125:  10%|█         | 10/100 [05:59<56:30, 37.67s/it]

mean return = 0.6776625514030457:  11%|█         | 11/100 [06:40<57:31, 38.78s/it]

mean return = 0.692574679851532:  12%|█▏        | 12/100 [07:24<59:04, 40.28s/it] 

mean return = 0.6705374121665955:  13%|█▎        | 13/100 [08:04<58:20, 40.23s/it]

mean return = 0.7104434370994568:  14%|█▍        | 14/100 [08:38<54:55, 38.32s/it]

mean return = 0.7100940346717834:  15%|█▌        | 15/100 [09:11<51:56, 36.66s/it]

mean return = 0.7040904760360718:  16%|█▌        | 16/100 [09:53<53:53, 38.49s/it]

mean return = 0.7041628956794739:  17%|█▋        | 17/100 [10:48<59:44, 43.18s/it]

mean return = 0.7013053297996521:  18%|█▊        | 18/100 [11:43<1:03:52, 46.73s/it]

mean return = 0.700619101524353:  19%|█▉        | 19/100 [12:37<1:06:08, 49.00s/it] 

mean return = 0.6833057999610901:  20%|██        | 20/100 [13:38<1:10:21, 52.77s/it]

mean return = 0.7155725955963135:  21%|██        | 21/100 [14:42<1:13:56, 56.15s/it]

mean return = 0.7192031145095825:  22%|██▏       | 22/100 [15:25<1:07:38, 52.03s/it]

mean return = 0.6908704042434692:  23%|██▎       | 23/100 [16:04<1:01:49, 48.18s/it]

mean return = 0.7112061977386475:  24%|██▍       | 24/100 [16:43<57:32, 45.43s/it]  

mean return = 0.6932700276374817:  25%|██▌       | 25/100 [17:19<53:04, 42.46s/it]

mean return = 0.7174630761146545:  26%|██▌       | 26/100 [17:55<50:04, 40.60s/it]

mean return = 0.7173253297805786:  27%|██▋       | 27/100 [18:28<46:42, 38.39s/it]

mean return = 0.7120981812477112:  28%|██▊       | 28/100 [19:01<44:02, 36.70s/it]

mean return = 0.7335034012794495:  29%|██▉       | 29/100 [19:34<42:10, 35.64s/it]

mean return = 0.7077249884605408:  30%|███       | 30/100 [20:07<40:42, 34.89s/it]

mean return = 0.7044774293899536:  31%|███       | 31/100 [20:41<39:41, 34.51s/it]

mean return = 0.7408342957496643:  32%|███▏      | 32/100 [21:16<39:12, 34.60s/it]

mean return = 0.7172651290893555:  33%|███▎      | 33/100 [21:51<38:52, 34.82s/it]

mean return = 0.70211261510849:  34%|███▍      | 34/100 [22:24<37:49, 34.38s/it]  

mean return = 0.7255316972732544:  35%|███▌      | 35/100 [22:58<37:04, 34.23s/it]

mean return = 0.7099975943565369:  36%|███▌      | 36/100 [23:33<36:42, 34.41s/it]

mean return = 0.7077577114105225:  37%|███▋      | 37/100 [24:15<38:41, 36.85s/it]

mean return = 0.7307745814323425:  38%|███▊      | 38/100 [24:58<39:41, 38.41s/it]

mean return = 0.7053977847099304:  39%|███▉      | 39/100 [25:41<40:31, 39.87s/it]

mean return = 0.7185912728309631:  40%|████      | 40/100 [26:22<40:07, 40.13s/it]

mean return = 0.7062655091285706:  41%|████      | 41/100 [27:02<39:27, 40.13s/it]

mean return = 0.6960562467575073:  42%|████▏     | 42/100 [27:40<38:21, 39.68s/it]

mean return = 0.6927105784416199:  43%|████▎     | 43/100 [28:18<37:06, 39.06s/it]

mean return = 0.729582667350769:  44%|████▍     | 44/100 [28:59<37:02, 39.69s/it] 

mean return = 0.7200937271118164:  45%|████▌     | 45/100 [29:40<36:41, 40.03s/it]

mean return = 0.713090717792511:  46%|████▌     | 46/100 [30:17<35:10, 39.09s/it] 

mean return = 0.6985776424407959:  47%|████▋     | 47/100 [30:59<35:25, 40.11s/it]

mean return = 0.7288970947265625:  48%|████▊     | 48/100 [31:41<35:10, 40.58s/it]

mean return = 0.7174538373947144:  49%|████▉     | 49/100 [32:21<34:16, 40.32s/it]

mean return = 0.7061137557029724:  50%|█████     | 50/100 [32:57<32:42, 39.24s/it]

mean return = 0.7091783285140991:  51%|█████     | 51/100 [33:33<31:08, 38.14s/it]

mean return = 0.724118709564209:  52%|█████▏    | 52/100 [34:05<29:07, 36.40s/it] 

mean return = 0.7250441908836365:  53%|█████▎    | 53/100 [34:37<27:22, 34.95s/it]

mean return = 0.7095404267311096:  54%|█████▍    | 54/100 [35:10<26:16, 34.28s/it]

mean return = 0.7219471335411072:  55%|█████▌    | 55/100 [35:43<25:26, 33.92s/it]

mean return = 0.714860737323761:  56%|█████▌    | 56/100 [36:17<24:52, 33.92s/it] 

mean return = 0.7411220669746399:  57%|█████▋    | 57/100 [36:50<24:09, 33.70s/it]

mean return = 0.727149248123169:  58%|█████▊    | 58/100 [37:23<23:33, 33.66s/it] 

mean return = 0.7253270745277405:  59%|█████▉    | 59/100 [37:55<22:35, 33.07s/it]

mean return = 0.714784562587738:  60%|██████    | 60/100 [38:27<21:43, 32.60s/it] 

mean return = 0.7304054498672485:  61%|██████    | 61/100 [39:00<21:17, 32.76s/it]

mean return = 0.7050451636314392:  62%|██████▏   | 62/100 [39:33<20:56, 33.06s/it]

mean return = 0.7498284578323364:  63%|██████▎   | 63/100 [40:06<20:14, 32.82s/it]

mean return = 0.69477379322052:  64%|██████▍   | 64/100 [40:39<19:44, 32.90s/it]  

mean return = 0.7069483399391174:  65%|██████▌   | 65/100 [41:12<19:13, 32.96s/it]

mean return = 0.7098681926727295:  66%|██████▌   | 66/100 [41:44<18:32, 32.71s/it]

mean return = 0.7104485630989075:  67%|██████▋   | 67/100 [42:16<17:50, 32.45s/it]

mean return = 0.7182338833808899:  68%|██████▊   | 68/100 [42:50<17:34, 32.95s/it]

mean return = 0.7405645847320557:  69%|██████▉   | 69/100 [43:22<16:55, 32.76s/it]

mean return = 0.7255043387413025:  70%|███████   | 70/100 [43:54<16:17, 32.57s/it]

mean return = 0.7202451229095459:  71%|███████   | 71/100 [44:27<15:44, 32.58s/it]

mean return = 0.7078493237495422:  72%|███████▏  | 72/100 [44:59<15:11, 32.54s/it]

mean return = 0.7191117405891418:  73%|███████▎  | 73/100 [45:28<14:05, 31.31s/it]

mean return = 0.7220961451530457:  74%|███████▍  | 74/100 [46:01<13:44, 31.71s/it]

mean return = 0.7151443362236023:  75%|███████▌  | 75/100 [46:34<13:23, 32.14s/it]

mean return = 0.716367244720459:  76%|███████▌  | 76/100 [47:05<12:48, 32.01s/it] 

mean return = 0.7388648986816406:  77%|███████▋  | 77/100 [47:38<12:17, 32.07s/it]

mean return = 0.7224564552307129:  78%|███████▊  | 78/100 [48:10<11:47, 32.17s/it]

mean return = 0.7197166085243225:  79%|███████▉  | 79/100 [48:42<11:14, 32.12s/it]

mean return = 0.7215009331703186:  80%|████████  | 80/100 [49:15<10:49, 32.47s/it]

mean return = 0.698326051235199:  81%|████████  | 81/100 [49:48<10:19, 32.61s/it] 

mean return = 0.7179234623908997:  82%|████████▏ | 82/100 [50:16<09:23, 31.28s/it]

mean return = 0.7383303642272949:  83%|████████▎ | 83/100 [50:49<08:56, 31.55s/it]

mean return = 0.7213178277015686:  84%|████████▍ | 84/100 [51:21<08:29, 31.82s/it]

mean return = 0.7097606658935547:  85%|████████▌ | 85/100 [51:54<08:01, 32.12s/it]

mean return = 0.7161166071891785:  86%|████████▌ | 86/100 [52:26<07:30, 32.15s/it]

mean return = 0.7327982783317566:  87%|████████▋ | 87/100 [52:58<06:55, 31.96s/it]

mean return = 0.71244877576828:  88%|████████▊ | 88/100 [53:35<06:42, 33.51s/it]  

mean return = 0.7179911732673645:  89%|████████▉ | 89/100 [54:11<06:17, 34.33s/it]

mean return = 0.7058972120285034:  90%|█████████ | 90/100 [54:46<05:46, 34.67s/it]

mean return = 0.7085570096969604:  91%|█████████ | 91/100 [55:12<04:48, 32.07s/it]

mean return = 0.7230396270751953:  92%|█████████▏| 92/100 [55:45<04:18, 32.33s/it]

mean return = 0.7299076318740845:  93%|█████████▎| 93/100 [56:19<03:48, 32.67s/it]

mean return = 0.7372070550918579:  94%|█████████▍| 94/100 [56:50<03:13, 32.28s/it]

mean return = 0.7239170670509338:  95%|█████████▌| 95/100 [57:24<02:43, 32.70s/it]

mean return = 0.7053519487380981:  96%|█████████▌| 96/100 [57:58<02:12, 33.12s/it]

mean return = 0.7100776433944702:  97%|█████████▋| 97/100 [58:32<01:39, 33.24s/it]

mean return = 0.7489469647407532:  98%|█████████▊| 98/100 [59:07<01:07, 33.88s/it]

mean return = 0.7270373702049255:  99%|█████████▉| 99/100 [59:40<00:33, 33.72s/it]

mean return = 0.7254847288131714: 100%|██████████| 100/100 [1:00:12<00:00, 33.11s/it]

0,1
collection/agents/info/agent_collisions,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
collection/agents/info/final_rew,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
collection/agents/info/pos_rew,▁▃▆▆▇▇█▇███▇█████▇██████▇██████▇██▇█████
collection/agents/logits,██▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
collection/agents/observation,▁▃▅▅▇▆█▆▇█▆▆▇▆▇▇▇▆▇▇▇▆█▆▆▇▇▆▅▇▇▆█▇▇▇█▇▇█
collection/agents/out_loc_norm,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
collection/agents/reward/episode_reward_max,▁▃▅▆▇▇▇▇▇▇▆▇█▇▇▆█▆▆▇▇▇▇▇█▆▇▇▆▇▇▇▆▇█▇▇█▇▇
collection/agents/reward/episode_reward_mean,▁▆▇▇▇███████████████████████████████████
collection/agents/reward/episode_reward_min,█▇▆▄▄▅▄▃▃▄▃▂▄▄▄▃▄▄▄▄▂▂▄▄▄▄▂▂▂▄▃▄▄▄▃▄▁▂▄▃
collection/agents/reward/reward_max,▁▅▇▆█▅▅▅▇▅▄▅▅▅▄▄▄▃▃▂▃▄▄▅▃▂▂▃▃▂▃▂▂▂▁▂▂▁▂▁

0,1
collection/agents/estimated_snd,
collection/agents/info/agent_collisions,0
collection/agents/info/final_rew,0
collection/agents/info/pos_rew,0.00733
collection/agents/logits,0.05807
collection/agents/observation,0.00992
collection/agents/out_loc_norm,0
collection/agents/reward/episode_reward_max,2.47983
collection/agents/reward/episode_reward_mean,0.72548
collection/agents/reward/episode_reward_min,-1.85222


mean return = 0.7254847288131714: 100%|██████████| 100/100 [1:00:13<00:00, 36.13s/it]


## Eval Run
---
Single Step eval. from the check point

In [18]:
# CONFIGURATION
import shutil


ABS_CONFIG_PATH = "/home/grad/doc/2027/spatel2/AD2C_testBed/AD2C/ControllingBehavioralDiversity/het_control/conf"
CONFIG_NAME = "navigation_ippo"



In [19]:
import time

# Your checkpoint path
CHECKPOINT_PATH = "/home/grad/doc/2027/spatel2/AD2C_testBed/saved_models/snd00.pt"

unique_id = f"AD2C_Eval_{int(time.time())}"

# FORCE WandB to use this ID, ignoring whatever is in your YAML/Hydra config
os.environ["WANDB_RUN_ID"] = unique_id
os.environ["WANDB_NAME"] = unique_id


# ==========================================
# EVALUATION LOGIC
# ==========================================
GlobalHydra.instance().clear()

sys.argv = [
    "eval_script.py",
    f"experiment.restore_file={CHECKPOINT_PATH}",
    "experiment.evaluation_episodes=10",
    "experiment.render=True",
    "experiment.evaluation_deterministic_actions=True",
    "experiment.save_folder=null",
    "model.desired_snd=0.3" ,
    f"+experiment.name={unique_id}"
]

@hydra.main(version_base=None, config_path=ABS_CONFIG_PATH, config_name=CONFIG_NAME)
def eval_experiment(cfg: DictConfig) -> None:
    print(f"Loading model from: {cfg.experiment.restore_file}")
    print(f"Initializing model with dummy SND: {cfg.model.desired_snd}")
    
    experiment = get_experiment(cfg=cfg)
    
    print("Model loaded. Starting Evaluation...")
    
    experiment._evaluation_loop()
    
    print("Evaluation Complete.")
    experiment.close()
    
if __name__ == "__main__":
    try:
        eval_experiment()
    except SystemExit:
        pass
    except Exception as e:
        print(f"An error occurred: {e}")

Exception ignored in: <function tqdm.__del__ at 0x75541890a8b0>
Traceback (most recent call last):
  File "/home/grad/doc/2027/spatel2/miniconda3/envs/ad2c/lib/python3.9/site-packages/tqdm/std.py", line 1148, in __del__
    self.close()
  File "/home/grad/doc/2027/spatel2/miniconda3/envs/ad2c/lib/python3.9/site-packages/tqdm/notebook.py", line 279, in close
    self.disp(bar_style='danger', check_delay=False)
AttributeError: 'tqdm_notebook' object has no attribute 'disp'


Loading model from: /home/grad/doc/2027/spatel2/AD2C_testBed/saved_models/snd00.pt
Initializing model with dummy SND: 0.3

Algorithm: ippo, Task: vmas/navigation


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
[34m[1mwandb[0m: Currently logged in as: [33msvarp[0m ([33msvarp-university-of-massachusetts-lowell[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Error executing job with overrides: ['experiment.restore_file=/home/grad/doc/2027/spatel2/AD2C_testBed/saved_models/snd00.pt', 'experiment.evaluation_episodes=10', 'experiment.render=True', 'experiment.evaluation_deterministic_actions=True', 'experiment.save_folder=null', 'model.desired_snd=0.3', '+experiment.name=AD2C_Eval_1764192012']
Traceback (most recent call last):
  File "/home/grad/doc/2027/spatel2/miniconda3/envs/ad2c/lib/python3.9/asyncio/locks.py", line 226, in wait
    await fut
asyncio.exceptions.CancelledError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/grad/doc/2027/spatel2/miniconda3/envs/ad2c/lib/python3.9/asyncio/tasks.py", line 490, in wait_for
    return fut.result()
asyncio.exceptions.CancelledError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/grad/doc/2027/spatel2/miniconda3/envs/ad2c/lib/python3.9/site-packages/wand

In [11]:
# !wandb login --relogin b3c2b62655aa322e8ab1d1ab07287749ce03ff8

## Testing Env with different task config
---

Changing the task config to check the adaptibility of learned policy. 

In [None]:
CHECKPOINT_PATH = "/home/grad/doc/2027/spatel2/AD2C_testBed/saved_models/model2.pt"

# ==========================================
# EVALUATION LOGIC
# ==========================================
GlobalHydra.instance().clear()

sys.argv = [
    "eval_script.py",
    f"experiment.restore_file={CHECKPOINT_PATH}",
    "experiment.evaluation_episodes=10",
    "experiment.render=True",
    "experiment.evaluation_deterministic_actions=True",
    "experiment.save_folder=null",
    "model.desired_snd=0.3",
    
    "task.agents_with_same_goal=1" 
]

@hydra.main(version_base=None, config_path=ABS_CONFIG_PATH, config_name=CONFIG_NAME)
def eval_experiment(cfg: DictConfig) -> None:
    print(f"Loading model from: {cfg.experiment.restore_file}")
    
    print(f"Evaluation Setup: {cfg.task.n_agents} Agents, {cfg.task.agents_with_same_goal} per goal.")
    
    experiment = get_experiment(cfg=cfg)
    print("Starting Evaluation...")
    experiment._evaluation_loop()
    print("Evaluation Complete.")
    experiment.close()

if __name__ == "__main__":
    try:
        eval_experiment()
    except SystemExit:
        pass
    except Exception as e:
        print(f"An error occurred: {e}")

Loading model from: /home/grad/doc/2027/spatel2/AD2C_testBed/model_checkpoints/navigation_ippo_esc/ippo_navigation_hetcontrolmlpempirical__8abb18cb_25_11_21-18_17_10/checkpoints/checkpoint_12000000.pt
Evaluation Setup: 3 Agents, 1 per goal.

Algorithm: ippo, Task: vmas/navigation


Starting Evaluation...




Evaluation Complete.




0,1
eval/agents/reward/episode_reward_max,▁
eval/agents/reward/episode_reward_mean,▁
eval/agents/reward/episode_reward_min,▁
eval/agents/snd,▁
eval/reward/episode_len_mean,▁
eval/reward/episode_reward_max,▁
eval/reward/episode_reward_mean,▁
eval/reward/episode_reward_min,▁
timers/evaluation_time,▁

0,1
collection/agents/estimated_snd,203.30853
collection/agents/info/agent_collisions,0
collection/agents/info/final_rew,0
collection/agents/info/pos_rew,0.01827
collection/agents/logits,0.02486
collection/agents/observation,-0.00358
collection/agents/out_loc_norm,0.0
collection/agents/reward/episode_reward_max,2.48916
collection/agents/reward/episode_reward_mean,1.03933
collection/agents/reward/episode_reward_min,0.22199


## Transfer Lerning
---

Finetune the policy to make it work with other task that it could not. 

In [None]:
CHECKPOINT_PATH = "/home/grad/doc/2027/spatel2/AD2C_testBed/saved_models/model2.pt"

# ==========================================
# 2. RUN LOGIC
# ==========================================
new_max_frames = 10000000 
desired_snd = 1.0

GlobalHydra.instance().clear()

sys.argv = [
    "run_script.py",
    f"model.desired_snd={desired_snd}",
    f"experiment.restore_file={CHECKPOINT_PATH}",
    f"experiment.max_n_frames={new_max_frames}",
    
    # --- TASK CONFIGURATION ---
    "task.agents_with_same_goal=1", 
    "experiment.save_folder=null"
]

@hydra.main(version_base=None, config_path=ABS_CONFIG_PATH, config_name=CONFIG_NAME)
def hydra_experiment(cfg: DictConfig) -> None:
    print(f"Resuming with SND: {cfg.model.desired_snd}")
    print(f"Agents sharing a goal: {cfg.task.agents_with_same_goal}")
    
    experiment = get_experiment(cfg=cfg)
    experiment.run()

if __name__ == "__main__":
    try:
        hydra_experiment()
    except SystemExit:
        print("Experiment finished successfully.")
    except Exception as e:
        print(f"An error occurred: {e}")



Resuming with SND: 1.0
Agents sharing a goal: 1

Algorithm: ippo, Task: vmas/navigation


[34m[1mwandb[0m: Currently logged in as: [33msvarp[0m ([33msvarp-university-of-massachusetts-lowell[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin



✅ SUCCESS: Extremum Seeking Controller initialized for group 'agents'.

SUCCESS: Logger initialized for HetControlMlpEscSnd on group 'agents'.
Experiment finished successfully.


Error executing job with overrides: ['model.desired_snd=1.0', 'experiment.restore_file=/home/grad/doc/2027/spatel2/AD2C_testBed/model_checkpoints/navigation_ippo_esc/ippo_navigation_hetcontrolmlpempirical__e3667b5c_25_11_21-15_00_13/checkpoints/checkpoint_12000000.pt', 'experiment.max_n_frames=15000000', 'task.agents_with_same_goal=1', 'experiment.save_folder=null']
Traceback (most recent call last):
  File "/tmp/ipykernel_2439253/1461699133.py", line 32, in hydra_experiment
    experiment = get_experiment(cfg=cfg)
  File "/tmp/ipykernel_2439253/1144324724.py", line 39, in get_experiment
    experiment = Experiment(
  File "/home/grad/doc/2027/spatel2/AD2C_testBed/AD2C/BenchMARL/benchmarl/experiment/experiment.py", line 332, in __init__
    self._load_experiment()
  File "/home/grad/doc/2027/spatel2/AD2C_testBed/AD2C/BenchMARL/benchmarl/experiment/experiment.py", line 792, in _load_experiment
    loaded_dict: OrderedDict = torch.load(self.config.restore_file)
  File "/home/grad/doc/202