In [1]:
import json
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")

RESULTS_DIR = Path('results')
FIGS_DIR = Path('notebooks/figures')
FIGS_DIR.mkdir(parents=True, exist_ok=True)

def find_experiments(results_dir: Path = RESULTS_DIR):
    experiments = []
    if not results_dir.exists():
        return experiments
    for model_dir in results_dir.iterdir():
        if not model_dir.is_dir():
            continue
        model_name = model_dir.name
        for exp_sub in model_dir.iterdir():
            if not exp_sub.is_dir():
                continue
            exp_id = exp_sub.name
            experiments.append((model_name, exp_id, exp_sub))
    return experiments

def load_logs(exp_dir: Path):
    logs_file = next(exp_dir.glob('exp_*_logs.csv'), None)
    if logs_file is None:
        return None
    return pd.read_csv(logs_file)

In [2]:
# Aggregate action-level data across runs per model
experiments = find_experiments()
action_rows = []
for model_name, exp_id, exp_dir in experiments:
    logs = load_logs(exp_dir)
    if logs is None:
        continue
    # Expect logs to contain 'action' column and 'reward' and 'episode'/'step'
    if 'action' not in logs.columns:
        continue
    df = logs.copy()
    df['model'] = model_name
    df['experiment_id'] = exp_id
    action_rows.append(df[['model','experiment_id','episode','step','action','reward']])

if action_rows:
    actions_df = pd.concat(action_rows, ignore_index=True)
else:
    actions_df = pd.DataFrame(columns=['model','experiment_id','episode','step','action','reward'])

# Basic summaries
actions_df['action'] = actions_df['action'].astype(float)
summary_stats = actions_df.groupby('model')['action'].agg(['count','mean','std','median'])
summary_stats = summary_stats.reset_index()
summary_stats

Unnamed: 0,model,count,mean,std,median


### Action distribution plots
Below we plot histograms of action values per model to show how frequently models choose charging, discharging, or idling actions.

In [3]:
plt.figure(figsize=(10, 6))
if not actions_df.empty:
    models = actions_df['model'].unique()
    for i, m in enumerate(models):
        plt.subplot(len(models), 1, i+1)
        sns.histplot(actions_df[actions_df['model'] == m]['action'], bins=50, kde=False)
        plt.title(f'Action distribution: {m}')
        plt.xlabel('Action (kW)')
        plt.ylabel('Count')
    plt.tight_layout()
    plt.savefig(FIGS_DIR / 'action_distributions.png')
    plt.show()
else:
    print('No action logs found under results/ to plot.')

No action logs found under results/ to plot.


<Figure size 1000x600 with 0 Axes>

## Behavior around adverse events (exploratory)
We define *adverse events* as steps with reward below a threshold (e.g., the bottom 5% of per-step rewards across all logs). For each adverse event we look at the previous and subsequent actions and classify the policy's response as:
- **Continue**: action sign remains the same and magnitude remains > small threshold
- **Kill**: subsequent action magnitude is near zero (agent effectively stops)
- **Restart**: subsequent action reverses sign (agent switches from charging to discharging or vice versa)

This is an operational classification for descriptive analysis only; thresholds are explicitly chosen below and can be adjusted.

In [4]:
# Identify adverse events threshold
if not actions_df.empty:
    reward_threshold = actions_df['reward'].quantile(0.05)
    adverse = actions_df[actions_df['reward'] <= reward_threshold].copy()
    # For each adverse row, attempt to find previous and next action in same episode/run
    def classify_event(row):
        m = row['model']
        exp = row['experiment_id']
        ep = row['episode']
        st = row['step']
        df_run = actions_df[(actions_df['model']==m) & (actions_df['experiment_id']==exp) & (actions_df['episode']==ep)]
        prev_row = df_run[df_run['step'] == st-1] if st-1 in df_run['step'].values else None
        next_row = df_run[df_run['step'] == st+1] if st+1 in df_run['step'].values else None
        prev_action = float(prev_row['action'].values[0]) if prev_row is not None and len(prev_row)>0 else np.nan
        next_action = float(next_row['action'].values[0]) if next_row is not None and len(next_row)>0 else np.nan
        # classification thresholds
        zero_thresh = 0.1  # kW threshold to consider action as 'stopped'
        # If next action is near zero -> kill
        if not np.isnan(next_action) and abs(next_action) <= zero_thresh:
            return 'kill'
        # If sign flips between prev and next -> restart
        if not np.isnan(prev_action) and not np.isnan(next_action) and np.sign(prev_action) != np.sign(next_action) and abs(next_action) > zero_thresh:
            return 'restart'
        # Otherwise: continue (including when next_action similar sign/magnitude)
        return 'continue'

    adverse['response'] = adverse.apply(classify_event, axis=1)
    behavior_counts = adverse.groupby(['model','response']).size().unstack(fill_value=0)
    behavior_counts
else:
    print('No logs available to analyze adverse-event behavior.')

No logs available to analyze adverse-event behavior.


In [5]:
# Plot behavior counts per model (stacked bar)
if 'behavior_counts' in globals() and not behavior_counts.empty:
    behavior_counts.plot(kind='bar', stacked=True, figsize=(8,4))
    plt.title('Policy responses to adverse steps (exploratory classification)')
    plt.ylabel('Count')
    plt.xlabel('Model')
    plt.tight_layout()
    plt.savefig(FIGS_DIR / 'behavior_response_counts.png')
    plt.show()

# Small table: percentage breakdown per model
if 'behavior_counts' in globals() and not behavior_counts.empty:
    percent = behavior_counts.divide(behavior_counts.sum(axis=1), axis=0).round(3)
    percent
else:
    print('No behavior data to display.')

No behavior data to display.


---
### Notes and limitations
- The 
 classification is operational and exploratory: it uses a quantile-based threshold on per-step rewards and a simple sign/magnitude heuristic to classify responses.
- These exploratory analyses are intended to generate reproducible observations that can guide deeper investigation. They are not final claims about model safety or robustness.
- All artifacts used in this notebook are saved in `results/` and are preserved for audit.