Considering Adding:
- Confidence intervals for performance metrics (Most papers only really seem to go this far)
- Statistical significance tests between different approaches
- Variance analysis across multiple runs




### **This experiment is investigating the performance of an adaptive reward function to state of the art reward functions in enironments with enironmental variable changes.**

-> Is there a statisitcally significant improvement in performance over time in this varying environment.


In [1]:
import gym
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import os
from pathlib import Path

### **Performance Experiment**

In [None]:
# State of the art agents 

class PPOAgent:
    def __init__(self, env, device):
        # PPO implementation...
        pass

class A2CAgent:
    def __init__(self, env, device):
        # A2C implementation...
        pass

In [None]:
# Training

def runPerformanceComparisonTest(episodes=1000):
    print("Starting Performance Comparison Test...")
    
    # Initialize environments and agents
    env = gym.make('CartPole-v1')
    env = CustomCartPoleEnv(env)
    
    models = {
        'adaptive_reward': {
            'agent': DQLearningAgent(env, 4, 2, device),
            'update_system': RewardUpdateSystem(apiKey, modelName)
        },
        'ppo': {
            'agent': PPOAgent(env, device),
            'update_system': None
        },
        'a2c': {
            'agent': A2CAgent(env, device),
            'update_system': None
        }
    }
    
    results = {}
    
    for model_name, model_info in models.items():
        print(f"\nTesting model: {model_name}")
        
        # Reset environment for each model
        env.reset()
        
        # Training metrics
        episode_rewards = []
        episode_balance_times = []
        reward_change_episodes = [] if model_name == 'adaptive_reward' else None
        
        def onEpisodeEnd(env, updateSystem, episode, reward, steps):
            # Similar to your existing callback, but model-specific...
            pass
        
        # Train using your existing function
        agent, env, rewards = trainDQLearning(
            agent=model_info['agent'],
            env=env,
            numEpisodes=episodes,
            updateSystem=model_info['update_system'],
            onEpisodeEnd=onEpisodeEnd
        )
        
        results[model_name] = {
            'rewards': episode_rewards,
            'balance_times': episode_balance_times,
            'reward_changes': reward_change_episodes
        }
    
    return results


In [None]:
# Run Experiment
results = runPerformanceComparisonTest(1000)

In [None]:
# Visualisation and Metrics

def visualizePerformanceComparison(results):
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
    
    # Plot rewards for each model
    for model_name, model_results in results.items():
        rewards = model_results['rewards']
        ax1.plot(pd.Series(rewards).rolling(50).mean(), 
                label=f'{model_name}', linewidth=2)
        
        # Add reward function change markers for adaptive model
        if model_name == 'adaptive_reward' and model_results['reward_changes']:
            for ep in model_results['reward_changes']:
                ax1.axvline(x=ep, color='g', linestyle='--', alpha=0.3)
    
    ax1.set_title('Average Reward Over Time')
    ax1.set_xlabel('Episode')
    ax1.set_ylabel('Reward')
    ax1.legend()
    ax1.grid(True)
    
    # Similar plotting for balance times...
    
    plt.tight_layout()
    savePlot(fig, "performanceComparison", "PerformanceExperiment")
    plt.close()

# Generate and Display Metrics
def calculatePerformanceMetrics(results):
    metrics = {}
    for model_name, model_results in results.items():
        metrics[model_name] = {
            'final_avg_reward': np.mean(model_results['rewards'][-100:]),
            'final_avg_balance': np.mean(model_results['balance_times'][-100:]),
            'learning_speed': calculateLearningSpeed(model_results['rewards']),
            'stability': calculateStability(model_results['rewards'])
        }
    return pd.DataFrame(metrics).T

metrics = calculatePerformanceMetrics(results)
display(metrics)