In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import os
from pathlib import Path
from datetime import datetime
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
if device == "cuda":
    print(f"GPU: {torch.cuda.get_device_name(0)}")

from fieldvision_complete import (
    make_multi_drone_env,
    get_difficulty_config,
    MAPPOWrapper,
)

import fieldvision_complete
TrainingConfig = fieldvision_complete.TrainingConfig
Trainer = fieldvision_complete.Trainer

from evaluation_pipeline import (
    evaluate_all_baselines,
    get_all_policies,
    evaluate_policy,
    MetricCollector,
    MARLResultsAdapter,
    create_summary_table,
    create_latex_table,
    generate_comprehensive_figures,
    print_results_summary,
    save_comprehensive_checkpoint,
    aggregate_marl_results,
)



Using device: cuda
GPU: NVIDIA A100-SXM4-40GB


In [2]:
SCENARIOS = ["streaming_hard", "rural_field", "variable_terrain", "network_aware"]
TOTAL_TIMESTEPS = 200_000  # Reduce for faster testing (50_000 for quick test)
N_SEEDS = 3               # Number of random seeds per algorithm
N_EVAL_EPISODES = 10      # Episodes per evaluation
N_BASELINE_EPISODES = 30  # Episodes for baseline evaluation
OUTPUT_DIR = "ijcnn_results"
# ===========================================

print("=" * 60)
print("EXPERIMENT CONFIGURATION")
print("=" * 60)
print(f"Scenarios: {SCENARIOS}")
print(f"Training timesteps: {TOTAL_TIMESTEPS:,}")
print(f"Seeds per algorithm: {N_SEEDS}")
print(f"Eval episodes per seed: {N_EVAL_EPISODES}")
print(f"Baseline episodes: {N_BASELINE_EPISODES}")
print(f"Output directory: {OUTPUT_DIR}")


EXPERIMENT CONFIGURATION
Scenarios: ['streaming_hard', 'rural_field', 'variable_terrain', 'network_aware']
Training timesteps: 200,000
Seeds per algorithm: 3
Eval episodes per seed: 10
Baseline episodes: 30
Output directory: ijcnn_results


In [3]:
print("\n" + "=" * 70)
print("PHASE 1: BASELINE EVALUATION")
print("=" * 70)

all_baseline_results = {}

for scenario in SCENARIOS:
    print(f"\n{'='*60}")
    print(f"Evaluating baselines on: {scenario.upper()}")
    print("=" * 60)

    baseline_results = evaluate_all_baselines(
        difficulty=scenario,
        n_episodes=N_BASELINE_EPISODES,
        seed=42,
        verbose=True
    )

    all_baseline_results[scenario] = baseline_results

    # Print summary for this scenario
    print(f"\n--- {scenario} Baseline Summary ---")
    for name, collector in baseline_results.items():
        s = collector.summary()
        print(f"{name:20s}: Reward={s['reward_mean']:7.1f}¬±{s['reward_std']:5.1f}  "
              f"Comp={s['completion']:5.1f}%  OnTime={s['on_time_rate']:5.1f}%  "
              f"Battery={s.get('battery_used_pct', 0):5.1f}%")

print("\n‚úì Baseline evaluation complete")



PHASE 1: BASELINE EVALUATION

Evaluating baselines on: STREAMING_HARD
BASELINE EVALUATION - streaming_hard
Episodes: 30, Seed: 42

Evaluating Local-Only...
  Reward: -6693.2 ¬± 0.0
  Actions: L=100% E=0% C=0%

Evaluating Edge-Only...
  Reward: 209.3 ¬± 151.4
  Actions: L=0% E=100% C=0%

Evaluating Cloud-Only...
  Reward: 2.1 ¬± 132.5
  Actions: L=0% E=0% C=100%

Evaluating Round-Robin...
  Reward: -170.3 ¬± 93.8
  Actions: L=34% E=33% C=33%

Evaluating Random...
  Reward: 121.8 ¬± 111.9
  Actions: L=34% E=33% C=33%

Evaluating Multi-Metric...
  Reward: 305.5 ¬± 109.4
  Actions: L=47% E=53% C=0%

Evaluating Multi-Metric-BW...
  Reward: 245.0 ¬± 133.9
  Actions: L=22% E=78% C=0%

Evaluating Multi-Metric-Urgent...
  Reward: 295.7 ¬± 106.3
  Actions: L=49% E=49% C=2%

Evaluating Latency-Greedy...
  Reward: 2.1 ¬± 132.5
  Actions: L=0% E=0% C=100%

--- streaming_hard Baseline Summary ---
Local-Only          : Reward=-6693.2¬±  0.0  Comp= 83.3%  OnTime= 83.3%  Battery= 92.0%
Edge-Only      

In [4]:
print("\n" + "=" * 70)
print("PHASE 2: MARL TRAINING")
print("=" * 70)

all_marl_results = {}
all_trainers = {}  # Store for potential cross-evaluation

algorithms = ["mappo"]

for scenario in SCENARIOS:
    print(f"\n{'='*70}")
    print(f"TRAINING ON: {scenario.upper()}")
    print("=" * 70)

    env_config = get_difficulty_config(scenario)
    config = TrainingConfig(
        total_timesteps=TOTAL_TIMESTEPS,
        max_steps_per_episode=env_config.get("max_steps", 100)
    )

    def make_env_with_seed(seed=None):
        if seed is None:
            seed = np.random.randint(0, 100000)
        return make_multi_drone_env(seed=seed, **env_config)

    scenario_results = {}
    scenario_trainers = {}

    for alg in algorithms:
        print(f"\n--- {alg} ---")

        # Collect results across seeds
        all_rewards = []
        all_completions = []
        all_deadline_misses = []
        all_transfer_failures = []
        all_on_time_rates = []
        all_tasks_on_time = []
        all_tasks_late = []
        all_latencies = []
        all_battery_used = []
        all_local_pct = []
        all_edge_pct = []
        all_cloud_pct = []

        for seed in range(N_SEEDS):
            print(f"  Seed {seed + 1}/{N_SEEDS}...", end=" ", flush=True)
            torch.manual_seed(seed)
            np.random.seed(seed)

            log_dir = f"{OUTPUT_DIR}/models/{scenario}/{alg}/seed_{seed}"
            trainer = Trainer(alg, config, make_env_with_seed, log_dir)
            trainer.train()

            # Evaluate
            stats = trainer.evaluate(n_episodes=N_EVAL_EPISODES, base_seed=2000 + seed * 100)

            # Collect per-episode data
            all_rewards.extend(stats['rewards'])
            all_completions.extend(stats.get('completions', [stats['completion']]))
            all_deadline_misses.extend(stats.get('deadline_misses_list', []))
            all_transfer_failures.extend(stats.get('transfer_failures_list', []))
            all_on_time_rates.extend(stats.get('on_time_rates_list', []))
            all_tasks_on_time.extend(stats.get('tasks_on_time_list', []))
            all_tasks_late.extend(stats.get('tasks_late_list', []))
            all_latencies.extend(stats.get('ep_mean_latencies', []))
            all_battery_used.append(stats.get('battery_used_pct', 0))
            all_local_pct.append(stats['local_pct'])
            all_edge_pct.append(stats['edge_pct'])
            all_cloud_pct.append(stats['cloud_pct'])

            scenario_trainers[(alg, seed)] = trainer
            print(f"reward={stats['reward_mean']:.1f}")

        # Aggregate
        scenario_results[alg] = {
            'reward_mean': np.mean(all_rewards),
            'reward_std': np.std(all_rewards),
            'rewards': all_rewards,
            'completion': np.mean(all_completions),
            'completions': all_completions,
            'deadline_misses': np.mean(all_deadline_misses) if all_deadline_misses else 0,
            'deadline_misses_list': all_deadline_misses,
            'transfer_failures': np.mean(all_transfer_failures) if all_transfer_failures else 0,
            'transfer_failures_list': all_transfer_failures,
            'on_time_rate': np.mean(all_on_time_rates) if all_on_time_rates else 100,
            'on_time_rates_list': all_on_time_rates,
            'tasks_on_time_list': all_tasks_on_time,
            'tasks_late_list': all_tasks_late,
            'avg_latency': np.mean(all_latencies) if all_latencies else 0,
            'latency_std': np.std(all_latencies) if all_latencies else 0,
            'ep_mean_latencies': all_latencies,
            'battery_used_pct': np.mean(all_battery_used),
            'local_pct': np.mean(all_local_pct),
            'edge_pct': np.mean(all_edge_pct),
            'cloud_pct': np.mean(all_cloud_pct),
        }

        print(f"  {alg}: {scenario_results[alg]['reward_mean']:.1f} ¬± {scenario_results[alg]['reward_std']:.1f}")

    all_marl_results[scenario] = scenario_results
    all_trainers[scenario] = scenario_trainers

print("\n‚úì MARL training complete")



PHASE 2: MARL TRAINING

TRAINING ON: STREAMING_HARD

--- mappo ---
  Seed 1/3... Update 10/195 | Steps: 10,240 | Reward: 195.46 | A_Loss: 0.0181 | C_Loss: 356.2749 | Ent: 0.396
Update 20/195 | Steps: 20,480 | Reward: 294.93 | A_Loss: -0.0612 | C_Loss: 247.2916 | Ent: 0.181
Update 30/195 | Steps: 30,720 | Reward: 288.42 | A_Loss: -0.0794 | C_Loss: 327.2029 | Ent: 0.129
Update 40/195 | Steps: 40,960 | Reward: 291.55 | A_Loss: -0.0754 | C_Loss: 307.2035 | Ent: 0.090
Update 50/195 | Steps: 51,200 | Reward: 303.48 | A_Loss: -0.0960 | C_Loss: 341.1252 | Ent: 0.053
Update 60/195 | Steps: 61,440 | Reward: 322.08 | A_Loss: -0.0356 | C_Loss: 252.4222 | Ent: 0.023
Update 70/195 | Steps: 71,680 | Reward: 311.23 | A_Loss: -0.0269 | C_Loss: 335.0631 | Ent: 0.014
Update 80/195 | Steps: 81,920 | Reward: 322.60 | A_Loss: -0.0576 | C_Loss: 377.2133 | Ent: 0.006
Update 90/195 | Steps: 92,160 | Reward: 310.94 | A_Loss: -0.1086 | C_Loss: 267.9602 | Ent: 0.009
Update 100/195 | Steps: 102,400 | Reward: 292.

In [5]:
print("\n" + "=" * 70)
print("PHASE 3: GENERATING PER-SCENARIO RESULTS")
print("=" * 70)

for scenario in SCENARIOS:
    print(f"\n--- {scenario} ---")

    scenario_dir = f"{OUTPUT_DIR}/{scenario}"
    Path(scenario_dir).mkdir(parents=True, exist_ok=True)

    # Convert baseline collectors to format expected by visualization functions
    baseline_for_viz = {}
    for name, collector in all_baseline_results[scenario].items():
        s = collector.summary()
        baseline_for_viz[name] = s

    # Save comprehensive checkpoint
    save_comprehensive_checkpoint(
        scenario_dir,
        all_marl_results[scenario],
        baseline_results=all_baseline_results[scenario],
        config={
            'scenario': scenario,
            'total_timesteps': TOTAL_TIMESTEPS,
            'n_seeds': N_SEEDS,
            'n_eval_episodes': N_EVAL_EPISODES,
        }
    )

print("\n‚úì Per-scenario results saved")



PHASE 3: GENERATING PER-SCENARIO RESULTS

--- streaming_hard ---
‚úì Saved summary table to ijcnn_results/streaming_hard/summary_table.csv
‚úì Saved LaTeX table to ijcnn_results/streaming_hard/summary_table.tex
‚úì Generated figures in ijcnn_results/streaming_hard/figures/
  - reward_comparison.png
  - completion_comparison.png
  - ontime_comparison.png
  - latency_comparison.png
  - battery_comparison.png
  - action_distribution.png
  - multi_metric_comparison.png

COMPREHENSIVE RESULTS SUMMARY

Algorithm                     Reward    Comp%  OnTime%    Latency   Battery% Actions (L/E/C)
-----------------------------------------------------------------------------------------------

[BASELINES]
Local-Only               -6693.2¬±0.0    83.3%    83.3%       1.2      92.0% 100/0/0
Edge-Only                209.3¬±151.4    72.6%    72.6%      19.7       0.0% 0/100/0
Cloud-Only                 2.1¬±132.5    72.6%    72.6%      19.7       0.0% 0/0/100
Round-Robin              -170.3¬±93.8   

In [6]:
print("\n" + "=" * 70)
print("PHASE 4: CROSS-SCENARIO AGGREGATION")
print("=" * 70)

# Build aggregate summary across all scenarios
aggregate_rows = []

# Baselines
for scenario in SCENARIOS:
    for name, collector in all_baseline_results[scenario].items():
        s = collector.summary()
        aggregate_rows.append({
            'scenario': scenario,
            'algorithm': name,
            'type': 'Baseline',
            'reward_mean': s['reward_mean'],
            'reward_std': s['reward_std'],
            'completion': s['completion'],
            'on_time_rate': s['on_time_rate'],
            'battery_used_pct': s.get('battery_used_pct', 0),
            'avg_latency': s.get('avg_latency', 0),
            'local_pct': s['local_pct'],
            'edge_pct': s['edge_pct'],
            'cloud_pct': s['cloud_pct'],
        })

# MARL
for scenario in SCENARIOS:
    for alg, stats in all_marl_results[scenario].items():
        aggregate_rows.append({
            'scenario': scenario,
            'algorithm': alg,
            'type': 'MARL',
            'reward_mean': stats['reward_mean'],
            'reward_std': stats['reward_std'],
            'completion': stats['completion'],
            'on_time_rate': stats['on_time_rate'],
            'battery_used_pct': stats.get('battery_used_pct', 0),
            'avg_latency': stats.get('avg_latency', 0),
            'local_pct': stats['local_pct'],
            'edge_pct': stats['edge_pct'],
            'cloud_pct': stats['cloud_pct'],
        })

aggregate_df = pd.DataFrame(aggregate_rows)
aggregate_df.to_csv(f"{OUTPUT_DIR}/aggregate_all_scenarios.csv", index=False)
print(f"‚úì Saved {OUTPUT_DIR}/aggregate_all_scenarios.csv")

# Print cross-scenario summary
print("\n" + "=" * 70)
print("CROSS-SCENARIO SUMMARY (Mean ¬± Std across all episodes)")
print("=" * 70)

# Group by algorithm and compute mean across scenarios
algo_summary = aggregate_df.groupby(['algorithm', 'type']).agg({
    'reward_mean': ['mean', 'std'],
    'completion': 'mean',
    'on_time_rate': 'mean',
    'battery_used_pct': 'mean',
}).round(2)

print(algo_summary)



PHASE 4: CROSS-SCENARIO AGGREGATION
‚úì Saved ijcnn_results/aggregate_all_scenarios.csv

CROSS-SCENARIO SUMMARY (Mean ¬± Std across all episodes)
                             reward_mean         completion on_time_rate  \
                                    mean     std       mean         mean   
algorithm           type                                                   
Cloud-Only          Baseline     -175.82  235.97      49.56        49.56   
Edge-Only           Baseline       -5.43  262.84      49.56        49.56   
Latency-Greedy      Baseline     -175.82  235.97      49.56        49.56   
Local-Only          Baseline    -6227.72  787.55      87.29        87.29   
Multi-Metric        Baseline      174.04  167.17      73.23        73.23   
Multi-Metric-BW     Baseline       85.63  186.49      65.08        65.08   
Multi-Metric-Urgent Baseline      166.41  165.32      73.80        73.80   
Random              Baseline      -23.68  180.72      67.37        67.37   
Round-Robin      

In [26]:
# =============================================================================
# CELL 5: FIGURE GENERATION (Updated for MAPPO vs Baselines only)
# =============================================================================
print("\n" + "=" * 70)
print("PHASE 5: GENERATING PUBLICATION FIGURES")
print("=" * 70)

figures_dir = f"{OUTPUT_DIR}/figures"
Path(figures_dir).mkdir(parents=True, exist_ok=True)

# Algorithms to compare: baselines + MAPPO only
algorithms_to_plot = ['Edge-Only', 'Random', 'Multi-Metric', 'MAPPO']
colors = ['#9b59b6', '#95a5a6', '#f39c12', '#e74c3c']
display_names = ['Edge-Only', 'Random', 'Multi-Metric', 'MAPPO (Ours)']

# Prepare data
scenarios_short = [s.replace('_', '\n') for s in SCENARIOS]
x = np.arange(len(SCENARIOS))
width = 0.15

# Figure 1: Cross-scenario reward comparison (grouped bar chart)
fig, ax = plt.subplots(figsize=(14, 6))

for i, (alg, color, display) in enumerate(zip(algorithms_to_plot, colors, display_names)):
    means = []
    stds = []
    for scenario in SCENARIOS:
        # Handle case-insensitive matching for 'mappo' vs 'MAPPO'
        row = aggregate_df[(aggregate_df['scenario'] == scenario) &
                          (aggregate_df['algorithm'].str.lower() == alg.lower())]
        if len(row) > 0:
            means.append(row['reward_mean'].values[0])
            stds.append(row['reward_std'].values[0])
        else:
            means.append(0)
            stds.append(0)

    offset = (i - len(algorithms_to_plot)/2 + 0.5) * width
    bars = ax.bar(x + offset, means, width, yerr=stds, label=display, color=color,
                  capsize=3, alpha=0.85)

ax.set_xlabel('Scenario', fontsize=12)
ax.set_ylabel('Episode Reward', fontsize=12)
ax.set_title('MAPPO vs Baseline Performance Across Agricultural Scenarios', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(scenarios_short)
ax.legend(loc='upper right', ncol=2)
ax.grid(True, alpha=0.3, axis='y')
ax.axhline(y=0, color='black', linestyle='-', linewidth=0.5)

plt.tight_layout()
plt.savefig(f"{figures_dir}/cross_scenario_reward.png", dpi=150, bbox_inches='tight')
plt.close()
print(f"‚úì Saved {figures_dir}/cross_scenario_reward.png")


# Figure 2: Completion rate comparison
fig, ax = plt.subplots(figsize=(14, 6))

for i, (alg, color, display) in enumerate(zip(algorithms_to_plot, colors, display_names)):
    completions = []
    for scenario in SCENARIOS:
        row = aggregate_df[(aggregate_df['scenario'] == scenario) &
                          (aggregate_df['algorithm'].str.lower() == alg.lower())]
        if len(row) > 0:
            completions.append(row['completion'].values[0])
        else:
            completions.append(0)

    offset = (i - len(algorithms_to_plot)/2 + 0.5) * width
    ax.bar(x + offset, completions, width, label=display, color=color, alpha=0.85)

ax.set_xlabel('Scenario', fontsize=12)
ax.set_ylabel('Task Completion Rate (%)', fontsize=12)
ax.set_title('Task Completion Rate Across Scenarios', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(scenarios_short)
ax.legend(loc='lower right', ncol=2)
ax.grid(True, alpha=0.3, axis='y')
ax.set_ylim(0, 100)

plt.tight_layout()
plt.savefig(f"{figures_dir}/cross_scenario_completion.png", dpi=150, bbox_inches='tight')
plt.close()
print(f"‚úì Saved {figures_dir}/cross_scenario_completion.png")


# Figure 3: Action distribution (stacked bar) - compare key policies
fig, axes = plt.subplots(1, len(SCENARIOS), figsize=(16, 5), sharey=True)

algs_for_stack = ['Edge-Only', 'Random', 'Multi-Metric', 'MAPPO']
stack_labels = ['Edge\nOnly', 'Random', 'Multi\nMetric', 'MAPPO']

for idx, scenario in enumerate(SCENARIOS):
    ax = axes[idx]

    local_pcts = []
    edge_pcts = []
    cloud_pcts = []

    for alg in algs_for_stack:
        row = aggregate_df[(aggregate_df['scenario'] == scenario) &
                          (aggregate_df['algorithm'].str.lower() == alg.lower())]
        if len(row) > 0:
            local_pcts.append(row['local_pct'].values[0])
            edge_pcts.append(row['edge_pct'].values[0])
            cloud_pcts.append(row['cloud_pct'].values[0])
        else:
            local_pcts.append(0)
            edge_pcts.append(0)
            cloud_pcts.append(0)

    x_stack = np.arange(len(algs_for_stack))
    ax.bar(x_stack, local_pcts, label='Local' if idx == 0 else '', color='#3498db')
    ax.bar(x_stack, edge_pcts, bottom=local_pcts, label='Edge' if idx == 0 else '', color='#2ecc71')
    ax.bar(x_stack, cloud_pcts, bottom=np.array(local_pcts) + np.array(edge_pcts),
           label='Cloud' if idx == 0 else '', color='#e74c3c')

    ax.set_title(scenario.replace('_', ' ').title(), fontsize=11, fontweight='bold')
    ax.set_xticks(x_stack)
    ax.set_xticklabels(stack_labels, fontsize=8)
    ax.set_ylim(0, 100)

    if idx == 0:
        ax.set_ylabel('Action Distribution (%)', fontsize=11)

axes[0].legend(loc='upper left')
fig.suptitle('Offloading Action Distribution by Scenario and Algorithm', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(f"{figures_dir}/action_distribution_by_scenario.png", dpi=150, bbox_inches='tight')
plt.close()
print(f"‚úì Saved {figures_dir}/action_distribution_by_scenario.png")


# Figure 4: MAPPO vs Multi-Metric (best heuristic baseline)
fig, ax = plt.subplots(figsize=(10, 6))

mappo_rewards = []
mm_rewards = []

for scenario in SCENARIOS:
    mappo_row = aggregate_df[(aggregate_df['scenario'] == scenario) &
                             (aggregate_df['algorithm'].str.lower() == 'mappo')]
    mm_row = aggregate_df[(aggregate_df['scenario'] == scenario) &
                          (aggregate_df['algorithm'] == 'Multi-Metric')]

    mappo_rewards.append(mappo_row['reward_mean'].values[0] if len(mappo_row) > 0 else 0)
    mm_rewards.append(mm_row['reward_mean'].values[0] if len(mm_row) > 0 else 0)

x = np.arange(len(SCENARIOS))
width = 0.35

ax.bar(x - width/2, mappo_rewards, width, label='MAPPO (Ours)', color='#e74c3c', alpha=0.85)
ax.bar(x + width/2, mm_rewards, width, label='Multi-Metric', color='#f39c12', alpha=0.85)

ax.set_xlabel('Scenario', fontsize=12)
ax.set_ylabel('Episode Reward', fontsize=12)
ax.set_title('MAPPO vs Multi-Metric Baseline', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(scenarios_short)
ax.legend()
ax.grid(True, alpha=0.3, axis='y')

# Add improvement percentages
# for i in range(len(SCENARIOS)):
#     if mm_rewards[i] != 0:
#         improvement = ((mappo_rewards[i] - mm_rewards[i]) / abs(mm_rewards[i])) * 100
#         y_pos = max(mappo_rewards[i], mm_rewards[i]) + 10
#         ax.annotate(f'+{improvement:.0f}%' if improvement > 0 else f'{improvement:.0f}%',
#                    xy=(i, y_pos), ha='center', fontsize=10, fontweight='bold',
#                    color='green' if improvement > 0 else 'red')

plt.tight_layout()
plt.savefig(f"{figures_dir}/mappo_vs_multiMetric.png", dpi=150, bbox_inches='tight')
plt.close()
print(f"‚úì Saved {figures_dir}/mappo_vs_multiMetric.png")


PHASE 5: GENERATING PUBLICATION FIGURES
‚úì Saved ijcnn_results/figures/cross_scenario_reward.png
‚úì Saved ijcnn_results/figures/cross_scenario_completion.png
‚úì Saved ijcnn_results/figures/action_distribution_by_scenario.png
‚úì Saved ijcnn_results/figures/mappo_vs_multiMetric.png


In [27]:
# =============================================================================
# CELL 6: LATEX TABLE GENERATION (Updated for MAPPO vs Baselines only)
# =============================================================================
print("\n" + "=" * 70)
print("PHASE 6: GENERATING LATEX TABLES")
print("=" * 70)

# Algorithms to include in table: baselines + MAPPO only
table_algorithms = ['Edge-Only', 'Random', 'Multi-Metric', 'MAPPO']

latex_rows = []
for scenario in SCENARIOS:
    for alg in table_algorithms:
        row = aggregate_df[(aggregate_df['scenario'] == scenario) &
                          (aggregate_df['algorithm'].str.lower() == alg.lower())]
        if len(row) > 0:
            r = row.iloc[0]
            latex_rows.append({
                'Scenario': scenario.replace('_', ' ').title(),
                'Algorithm': alg if alg != 'MAPPO' else '\\textbf{MAPPO (Ours)}',
                'Reward': f"{r['reward_mean']:.1f} $\\pm$ {r['reward_std']:.1f}",
                'Completion': f"{r['completion']:.1f}\\%",
                'On-Time': f"{r['on_time_rate']:.1f}\\%",
                'Battery': f"{r['battery_used_pct']:.1f}\\%",
                'Actions (L/E/C)': f"{r['local_pct']:.0f}/{r['edge_pct']:.0f}/{r['cloud_pct']:.0f}",
            })

latex_df = pd.DataFrame(latex_rows)
latex_df.to_csv(f"{OUTPUT_DIR}/latex_table_data.csv", index=False)

# Generate LaTeX table
latex_str = r"""
\begin{table*}[htbp]
\centering
\caption{Performance comparison of MAPPO against baseline strategies across agricultural scenarios.
Reward is mean $\pm$ std over evaluation episodes. Completion and On-Time rates indicate
percentage of tasks completed and completed before deadline, respectively.
Actions show distribution across Local/Edge/Cloud execution.}
\label{tab:cross_scenario_results}
\resizebox{\textwidth}{!}{
\begin{tabular}{llccccc}
\toprule
\textbf{Scenario} & \textbf{Algorithm} & \textbf{Reward} & \textbf{Completion} & \textbf{On-Time} & \textbf{Battery Used} & \textbf{Actions (L/E/C)} \\
\midrule
"""

current_scenario = None
for _, row in latex_df.iterrows():
    if row['Scenario'] != current_scenario:
        if current_scenario is not None:
            latex_str += r"\midrule" + "\n"
        current_scenario = row['Scenario']

    # Bold the best reward in each scenario
    latex_str += f"{row['Scenario']} & {row['Algorithm']} & {row['Reward']} & {row['Completion']} & {row['On-Time']} & {row['Battery']} & {row['Actions (L/E/C)']} \\\\\n"

latex_str += r"""
\bottomrule
\end{tabular}
}
\end{table*}
"""

with open(f"{OUTPUT_DIR}/results_table.tex", 'w') as f:
    f.write(latex_str)
print(f"‚úì Saved {OUTPUT_DIR}/results_table.tex")


# =============================================================================
# FINAL SUMMARY
# =============================================================================
print("\n" + "=" * 70)
print("EXPERIMENT COMPLETE - FINAL SUMMARY")
print("=" * 70)

# Filter to only the algorithms we care about
summary_algs = ['Local-Only', 'Edge-Only', 'Random', 'Multi-Metric', 'MAPPO']
filtered_df = aggregate_df[aggregate_df['algorithm'].str.lower().isin([a.lower() for a in summary_algs])]

print("\nüìä Overall Results (averaged across all scenarios):\n")

overall_summary = filtered_df.groupby('algorithm').agg({
    'reward_mean': 'mean',
    'completion': 'mean',
    'on_time_rate': 'mean',
}).sort_values('reward_mean', ascending=False)

print(overall_summary.round(2).to_string())

best_alg = overall_summary.index[0]
best_reward = overall_summary.loc[best_alg, 'reward_mean']
print(f"\nüèÜ Best Overall Algorithm: {best_alg} (mean reward: {best_reward:.1f})")

# Compute improvement over baselines
if 'Multi-Metric' in overall_summary.index and best_alg.lower() == 'mappo':
    mm_reward = overall_summary.loc['Multi-Metric', 'reward_mean']
    improvement = ((best_reward - mm_reward) / abs(mm_reward)) * 100 if mm_reward != 0 else 0
    print(f"üìà Improvement over Multi-Metric: {improvement:+.1f}%")

# Print file listing
print(f"\nüìÅ Output files in {OUTPUT_DIR}/:")
for root, dirs, files in os.walk(OUTPUT_DIR):
    level = root.replace(OUTPUT_DIR, '').count(os.sep)
    indent = ' ' * 2 * level
    print(f'{indent}{os.path.basename(root)}/')
    subindent = ' ' * 2 * (level + 1)
    for file in files[:5]:
        print(f'{subindent}{file}')
    if len(files) > 5:
        print(f'{subindent}... and {len(files) - 5} more files')


PHASE 6: GENERATING LATEX TABLES
‚úì Saved ijcnn_results/results_table.tex

EXPERIMENT COMPLETE - FINAL SUMMARY

üìä Overall Results (averaged across all scenarios):

              reward_mean  completion  on_time_rate
algorithm                                          
mappo              254.25       79.03         79.03
Multi-Metric       174.04       73.23         73.23
Edge-Only           -5.43       49.56         49.56
Random             -23.68       67.37         67.37
Local-Only       -6227.72       87.29         87.29

üèÜ Best Overall Algorithm: mappo (mean reward: 254.2)
üìà Improvement over Multi-Metric: +46.1%

üìÅ Output files in ijcnn_results/:
ijcnn_results/
  results_table.tex
  latex_table_data.csv
  aggregate_all_scenarios.csv
  network_aware/
    config.json
    summary_table.tex
    aggregate_metrics.csv
    summary_table.csv
    marl/
      mappo/
        episode_metrics.csv
        summary.json
    baselines/
      Cloud_Only/
        episode_metrics.csv
     

In [28]:
print("\n" + "=" * 70)
print("EXPERIMENT COMPLETE - FINAL SUMMARY")
print("=" * 70)

# Overall best performer
print("\nüìä Overall Results (averaged across all scenarios):\n")

overall_summary = aggregate_df.groupby('algorithm').agg({
    'reward_mean': 'mean',
    'completion': 'mean',
    'on_time_rate': 'mean',
}).sort_values('reward_mean', ascending=False)

print(overall_summary.round(2).to_string())

best_alg = overall_summary.index[0]
best_reward = overall_summary.loc[best_alg, 'reward_mean']
print(f"\nüèÜ Best Overall Algorithm: {best_alg} (mean reward: {best_reward:.1f})")

# Print file listing
print(f"\nüìÅ Output files in {OUTPUT_DIR}/:")
for root, dirs, files in os.walk(OUTPUT_DIR):
    level = root.replace(OUTPUT_DIR, '').count(os.sep)
    indent = ' ' * 2 * level
    print(f'{indent}{os.path.basename(root)}/')
    subindent = ' ' * 2 * (level + 1)
    for file in files[:5]:  # Limit to first 5 files per dir
        print(f'{subindent}{file}')
    if len(files) > 5:
        print(f'{subindent}... and {len(files) - 5} more files')




EXPERIMENT COMPLETE - FINAL SUMMARY

üìä Overall Results (averaged across all scenarios):

                     reward_mean  completion  on_time_rate
algorithm                                                 
mappo                     254.25       79.03         79.03
Multi-Metric              174.04       73.23         73.23
Multi-Metric-Urgent       166.41       73.80         73.80
Multi-Metric-BW            85.63       65.08         65.08
Edge-Only                  -5.43       49.56         49.56
Random                    -23.68       67.37         67.37
Cloud-Only               -175.82       49.56         49.56
Latency-Greedy           -175.82       49.56         49.56
Round-Robin              -192.28       71.43         71.43
Local-Only              -6227.72       87.29         87.29

üèÜ Best Overall Algorithm: mappo (mean reward: 254.2)

üìÅ Output files in ijcnn_results/:
ijcnn_results/
  results_table.tex
  latex_table_data.csv
  aggregate_all_scenarios.csv
  network_aware/

In [29]:
!zip -r {OUTPUT_DIR}.zip {OUTPUT_DIR}/
from google.colab import files
files.download(f'{OUTPUT_DIR}.zip')

updating: ijcnn_results/ (stored 0%)
updating: ijcnn_results/network_aware/ (stored 0%)
updating: ijcnn_results/network_aware/marl/ (stored 0%)
updating: ijcnn_results/network_aware/marl/mappo/ (stored 0%)
updating: ijcnn_results/network_aware/marl/mappo/episode_metrics.csv (deflated 70%)
updating: ijcnn_results/network_aware/marl/mappo/summary.json (deflated 54%)
updating: ijcnn_results/network_aware/config.json (deflated 18%)
updating: ijcnn_results/network_aware/baselines/ (stored 0%)
updating: ijcnn_results/network_aware/baselines/Cloud_Only/ (stored 0%)
updating: ijcnn_results/network_aware/baselines/Cloud_Only/episode_metrics.csv (deflated 72%)
updating: ijcnn_results/network_aware/baselines/Random/ (stored 0%)
updating: ijcnn_results/network_aware/baselines/Random/episode_metrics.csv (deflated 67%)
updating: ijcnn_results/network_aware/baselines/Multi_Metric_BW/ (stored 0%)
updating: ijcnn_results/network_aware/baselines/Multi_Metric_BW/episode_metrics.csv (deflated 69%)
updatin

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>