# Battery Offloading Simulation Analysis

This notebook provides template analysis for battery offloading simulation results.
It automatically loads the latest simulation results and generates comprehensive visualizations.

## Setup and Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import os
from datetime import datetime

# Configure matplotlib for better plots
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['font.size'] = 12
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.alpha'] = 0.3

print("Setup complete!")

## Load Latest Results

In [None]:
def find_latest_results(base_dir="../results"):
    """
    Find the most recent simulation results directory.
    
    Returns:
        tuple: (results_dir, per_task_file, summary_file)
    """
    results_path = Path(base_dir)
    if not results_path.exists():
        results_path = Path("../src/results")
    
    # Find all timestamp directories
    timestamp_dirs = []
    for item in results_path.iterdir():
        if item.is_dir() and item.name.replace('_', '').replace('-', '').isdigit():
            timestamp_dirs.append(item)
    
    if not timestamp_dirs:
        raise FileNotFoundError("No simulation results found in results directory")
    
    # Get the most recent directory
    latest_dir = max(timestamp_dirs, key=lambda x: x.name)
    
    # Look for CSV files
    per_task_file = latest_dir / "per_task_results.csv"
    summary_file = latest_dir / "summary_statistics.csv"
    
    if not per_task_file.exists():
        raise FileNotFoundError(f"per_task_results.csv not found in {latest_dir}")
    if not summary_file.exists():
        raise FileNotFoundError(f"summary_statistics.csv not found in {latest_dir}")
    
    return latest_dir, per_task_file, summary_file

# Load the latest results
try:
    results_dir, per_task_file, summary_file = find_latest_results()
    print(f"Loading results from: {results_dir}")
    
    # Load the data
    per_task_df = pd.read_csv(per_task_file)
    summary_df = pd.read_csv(summary_file)
    
    print(f"Loaded {len(per_task_df)} task records")
    print(f"Summary: {len(summary_df)} metrics")
    
except Exception as e:
    print(f"Error loading results: {e}")
    print("Please run a simulation first or check the results directory path")

## Data Overview

In [None]:
# Display basic information about the data
if 'per_task_df' in locals():
    print("Per-task data columns:")
    print(per_task_df.columns.tolist())
    print("\nFirst few rows of per-task data:")
    display(per_task_df.head())
    
    print("\nSummary statistics:")
    display(summary_df)

## 1. Latency Distribution Histogram

In [None]:
def plot_latency_distribution(per_task_df, save_path=None):
    """
    Plot histogram of task latency distribution.
    """
    fig, ax = plt.subplots(figsize=(12, 6))
    
    # Convert latency to milliseconds if needed
    if 'latency_ms' in per_task_df.columns:
        latency_data = per_task_df['latency_ms']
        unit = 'ms'
    elif 'total_latency_s' in per_task_df.columns:
        latency_data = per_task_df['total_latency_s'] * 1000  # Convert to ms
        unit = 'ms'
    else:
        print("Warning: No latency column found")
        return
    
    # Create histogram with execution site coloring
    if 'execution_site' in per_task_df.columns:
        sites = per_task_df['execution_site'].unique()
        colors = ['#1f77b4', '#ff7f0e', '#2ca02c']  # Blue, Orange, Green
        
        for i, site in enumerate(sites):
            site_data = per_task_df[per_task_df['execution_site'] == site]
            ax.hist(site_data[latency_data.name], bins=20, alpha=0.7, 
                   label=f'{site} ({len(site_data)} tasks)', 
                   color=colors[i % len(colors)])
    else:
        ax.hist(latency_data, bins=20, alpha=0.7, color='#1f77b4')
    
    ax.set_xlabel(f'Task Latency ({unit})')
    ax.set_ylabel('Number of Tasks')
    ax.set_title('Task Latency Distribution by Execution Site')
    ax.legend()
    
    # Add statistics text
    mean_latency = latency_data.mean()
    p95_latency = latency_data.quantile(0.95)
    ax.axvline(mean_latency, color='red', linestyle='--', alpha=0.7, label=f'Mean: {mean_latency:.1f}{unit}')
    ax.axvline(p95_latency, color='orange', linestyle='--', alpha=0.7, label=f'P95: {p95_latency:.1f}{unit}')
    ax.legend()
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Latency distribution plot saved to: {save_path}")
    
    plt.show()

# Generate the plot
if 'per_task_df' in locals():
    plot_latency_distribution(per_task_df)

## 2. Battery SoC Curve

In [None]:
def plot_soc_curve(per_task_df, save_path=None):
    """
    Plot battery State of Charge (SoC) over time.
    """
    fig, ax = plt.subplots(figsize=(12, 6))
    
    # Check for SoC columns
    if 'soc_after' in per_task_df.columns and 'completion_time_s' in per_task_df.columns:
        time_data = per_task_df['completion_time_s']
        soc_data = per_task_df['soc_after']
        
        # Add initial point (assuming we start at some initial SoC)
        if 'soc_before' in per_task_df.columns and len(per_task_df) > 0:
            initial_soc = per_task_df.iloc[0]['soc_before']
            time_data = [0] + time_data.tolist()
            soc_data = [initial_soc] + soc_data.tolist()
        
        ax.plot(time_data, soc_data, 'b-', linewidth=2, marker='o', markersize=4)
        
        # Add execution site markers
        if 'execution_site' in per_task_df.columns:
            colors = {'LOCAL': 'blue', 'EDGE': 'orange', 'CLOUD': 'green'}
            for site, color in colors.items():
                site_mask = per_task_df['execution_site'] == site
                if site_mask.any():
                    ax.scatter(per_task_df[site_mask]['completion_time_s'], 
                              per_task_df[site_mask]['soc_after'],
                              c=color, s=50, alpha=0.7, label=f'{site} tasks')
        
        ax.set_xlabel('Time (seconds)')
        ax.set_ylabel('Battery SoC (%)')
        ax.set_title('Battery State of Charge Over Time')
        ax.set_ylim(0, 100)
        ax.legend()
        
        # Add grid for better readability
        ax.grid(True, alpha=0.3)
        
    else:
        ax.text(0.5, 0.5, 'SoC data not available\nColumns needed: soc_after, completion_time_s', 
                transform=ax.transAxes, ha='center', va='center', fontsize=14)
        ax.set_title('Battery SoC Curve (No Data Available)')
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"SoC curve plot saved to: {save_path}")
    
    plt.show()

# Generate the plot
if 'per_task_df' in locals():
    plot_soc_curve(per_task_df)

## 3. Energy Consumption Box Plot

In [None]:
def plot_energy_boxplot(per_task_df, save_path=None):
    """
    Plot box plot of energy consumption by execution site.
    """
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Check for energy and execution site columns
    energy_col = None
    for col in ['energy_wh', 'total_energy_wh', 'energy_consumed_wh']:
        if col in per_task_df.columns:
            energy_col = col
            break
    
    if energy_col and 'execution_site' in per_task_df.columns:
        # Prepare data for box plot
        sites = per_task_df['execution_site'].unique()
        energy_by_site = [per_task_df[per_task_df['execution_site'] == site][energy_col].values 
                         for site in sites]
        
        # Create box plot
        box_plot = ax.boxplot(energy_by_site, labels=sites, patch_artist=True)
        
        # Color the boxes
        colors = ['#1f77b4', '#ff7f0e', '#2ca02c']  # Blue, Orange, Green
        for patch, color in zip(box_plot['boxes'], colors[:len(sites)]):
            patch.set_facecolor(color)
            patch.set_alpha(0.7)
        
        ax.set_xlabel('Execution Site')
        ax.set_ylabel('Energy Consumption (Wh)')
        ax.set_title('Energy Consumption Distribution by Execution Site')
        
        # Add statistics annotations
        for i, site in enumerate(sites):
            site_data = per_task_df[per_task_df['execution_site'] == site][energy_col]
            mean_energy = site_data.mean()
            ax.text(i+1, mean_energy, f'{mean_energy:.4f}', ha='center', 
                   bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.7))
        
    else:
        ax.text(0.5, 0.5, f'Energy data not available\nColumns needed: energy_wh, execution_site\nAvailable: {list(per_task_df.columns)}', 
                transform=ax.transAxes, ha='center', va='center', fontsize=12)
        ax.set_title('Energy Consumption Box Plot (No Data Available)')
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Energy box plot saved to: {save_path}")
    
    plt.show()

# Generate the plot
if 'per_task_df' in locals():
    plot_energy_boxplot(per_task_df)

## 4. Execution Site Distribution Pie Chart

In [None]:
def plot_execution_site_pie(per_task_df, save_path=None):
    """
    Plot pie chart showing distribution of tasks across execution sites.
    """
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    # Execution Site Distribution
    if 'execution_site' in per_task_df.columns:
        site_counts = per_task_df['execution_site'].value_counts()
        colors = ['#1f77b4', '#ff7f0e', '#2ca02c']  # Blue, Orange, Green
        
        wedges, texts, autotexts = ax1.pie(site_counts.values, 
                                          labels=site_counts.index,
                                          colors=colors[:len(site_counts)],
                                          autopct='%1.1f%%',
                                          startangle=90)
        
        ax1.set_title('Task Distribution by Execution Site')
        
        # Add count annotations
        for i, (label, count) in enumerate(site_counts.items()):
            ax1.text(0, -1.3 - i*0.1, f'{label}: {count} tasks', 
                    transform=ax1.transData, ha='center')
    else:
        ax1.text(0.5, 0.5, 'Execution site data\nnot available', 
                transform=ax1.transAxes, ha='center', va='center')
        ax1.set_title('Execution Site Distribution (No Data)')
    
    # Task Type Distribution
    if 'task_type' in per_task_df.columns:
        type_counts = per_task_df['task_type'].value_counts()
        colors = ['#d62728', '#9467bd', '#8c564b']  # Red, Purple, Brown
        
        wedges, texts, autotexts = ax2.pie(type_counts.values,
                                          labels=type_counts.index,
                                          colors=colors[:len(type_counts)],
                                          autopct='%1.1f%%',
                                          startangle=90)
        
        ax2.set_title('Task Distribution by Task Type')
        
        # Add count annotations
        for i, (label, count) in enumerate(type_counts.items()):
            ax2.text(0, -1.3 - i*0.1, f'{label}: {count} tasks', 
                    transform=ax2.transData, ha='center')
    else:
        ax2.text(0.5, 0.5, 'Task type data\nnot available', 
                transform=ax2.transAxes, ha='center', va='center')
        ax2.set_title('Task Type Distribution (No Data)')
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Distribution pie charts saved to: {save_path}")
    
    plt.show()

# Generate the plot
if 'per_task_df' in locals():
    plot_execution_site_pie(per_task_df)

## 5. Task Timeline Visualization

In [None]:
def plot_task_timeline(per_task_df, save_path=None):
    """
    Plot task execution timeline with different colors for execution sites.
    """
    fig, ax = plt.subplots(figsize=(14, 8))
    
    if 'arrival_time_s' in per_task_df.columns and 'completion_time_s' in per_task_df.columns:
        # Color mapping for execution sites
        color_map = {'LOCAL': '#1f77b4', 'EDGE': '#ff7f0e', 'CLOUD': '#2ca02c'}
        
        # Plot each task as a horizontal bar
        for i, row in per_task_df.iterrows():
            start_time = row['arrival_time_s']
            end_time = row['completion_time_s']
            site = row.get('execution_site', 'UNKNOWN')
            color = color_map.get(site, '#cccccc')
            
            ax.barh(i, end_time - start_time, left=start_time, 
                   color=color, alpha=0.7, height=0.8)
        
        ax.set_xlabel('Time (seconds)')
        ax.set_ylabel('Task ID')
        ax.set_title('Task Execution Timeline by Site')
        
        # Create legend
        legend_elements = [plt.Rectangle((0,0),1,1, facecolor=color, alpha=0.7, label=site) 
                          for site, color in color_map.items()]
        ax.legend(handles=legend_elements)
        
    else:
        ax.text(0.5, 0.5, 'Timeline data not available\nColumns needed: arrival_time_s, completion_time_s', 
                transform=ax.transAxes, ha='center', va='center', fontsize=14)
        ax.set_title('Task Timeline (No Data Available)')
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"Task timeline plot saved to: {save_path}")
    
    plt.show()

# Generate the plot
if 'per_task_df' in locals():
    plot_task_timeline(per_task_df)

## Summary Statistics Table

In [None]:
def print_summary_statistics(per_task_df, summary_df):
    """
    Print comprehensive summary statistics.
    """
    print("=" * 60)
    print("SIMULATION SUMMARY STATISTICS")
    print("=" * 60)
    
    # Basic statistics
    total_tasks = len(per_task_df)
    print(f"Total Tasks: {total_tasks}")
    
    if 'execution_site' in per_task_df.columns:
        print("\nExecution Site Distribution:")
        site_counts = per_task_df['execution_site'].value_counts()
        for site, count in site_counts.items():
            print(f"  {site}: {count} ({count/total_tasks*100:.1f}%)")
    
    if 'task_type' in per_task_df.columns:
        print("\nTask Type Distribution:")
        type_counts = per_task_df['task_type'].value_counts()
        for task_type, count in type_counts.items():
            print(f"  {task_type}: {count} ({count/total_tasks*100:.1f}%)")
    
    # Latency statistics
    latency_col = None
    for col in ['latency_ms', 'total_latency_s']:
        if col in per_task_df.columns:
            latency_col = col
            break
    
    if latency_col:
        latency_data = per_task_df[latency_col]
        if latency_col == 'total_latency_s':
            latency_data = latency_data * 1000  # Convert to ms
            unit = 'ms'
        else:
            unit = 'ms'
            
        print(f"\nLatency Statistics ({unit}):")
        print(f"  Mean: {latency_data.mean():.2f}")
        print(f"  Median: {latency_data.median():.2f}")
        print(f"  P95: {latency_data.quantile(0.95):.2f}")
        print(f"  P99: {latency_data.quantile(0.99):.2f}")
        print(f"  Min: {latency_data.min():.2f}")
        print(f"  Max: {latency_data.max():.2f}")
    
    # Energy statistics
    energy_col = None
    for col in ['energy_wh', 'total_energy_wh', 'energy_consumed_wh']:
        if col in per_task_df.columns:
            energy_col = col
            break
    
    if energy_col:
        energy_data = per_task_df[energy_col]
        print(f"\nEnergy Statistics (Wh):")
        print(f"  Total: {energy_data.sum():.6f}")
        print(f"  Mean per task: {energy_data.mean():.6f}")
        print(f"  Std dev: {energy_data.std():.6f}")
    
    # Battery statistics
    if 'soc_after' in per_task_df.columns:
        initial_soc = per_task_df.iloc[0].get('soc_before', per_task_df['soc_after'].max())
        final_soc = per_task_df['soc_after'].iloc[-1]
        print(f"\nBattery Statistics (%):")
        print(f"  Initial SoC: {initial_soc:.2f}")
        print(f"  Final SoC: {final_soc:.2f}")
        print(f"  SoC Drop: {initial_soc - final_soc:.2f}")
    
    print("=" * 60)

# Print summary
if 'per_task_df' in locals() and 'summary_df' in locals():
    print_summary_statistics(per_task_df, summary_df)

## Save All Plots

This section saves all plots to a figures directory within the results folder.

In [None]:
# Create figures directory and save all plots
if 'results_dir' in locals():
    figures_dir = results_dir / "figures"
    figures_dir.mkdir(exist_ok=True)
    
    print(f"Saving all plots to: {figures_dir}")
    
    # Generate and save all plots
    plot_latency_distribution(per_task_df, figures_dir / "latency_distribution.png")
    plot_soc_curve(per_task_df, figures_dir / "soc_curve.png")
    plot_energy_boxplot(per_task_df, figures_dir / "energy_boxplot.png")
    plot_execution_site_pie(per_task_df, figures_dir / "distribution_pies.png")
    plot_task_timeline(per_task_df, figures_dir / "task_timeline.png")
    
    print("\nAll plots saved successfully!")
else:
    print("No results directory found. Please load results first.")