# Random Ensemble Orchestration - With get STATE


Setting up the enviroment and simulation

In [8]:
from model_rl import ServiceConfig, ProcessingService, ServiceType, InferenceService, EnsembleService
import logging
import os
import time
import pandas as pd
import random
import copy
from collections import Counter
from rohe.common import rohe_utils as utils
from IPython.display import clear_output
from config_manager import ConfigManager
import yaml

# Configure logging
#logging.basicConfig(format="%(asctime)s:%(levelname)s -- %(message)s", level=logging.INFO)
#logging.getLogger().setLevel(logging.CRITICAL)

# Paths and configurations
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
MODEL_DATA_PATH = os.path.join(parent_dir, "profile/processed")
PROFILE_PATH = os.path.join(parent_dir, "profile/model_profile/model_profile.yaml")
CONFIG_PATH = os.path.join(current_dir, "sim_config.yaml")
FILE_DATA_PATH = os.path.join(current_dir, "file_label.csv")
THROUGHPUT_REQUIREMENT = 15

# RELOAD INITIAL YAML
initial_yaml_file_path = "initial_sim_config.yaml"  # Ensure this path matches your desired location
# Load contents from the initial YAML file
with open(initial_yaml_file_path, "r") as initial_file:
    initial_yaml_content = yaml.safe_load(initial_file)

# Write the loaded contents into the target YAML file
with open(CONFIG_PATH, "w") as target_file:
    yaml.dump(initial_yaml_content, target_file, default_flow_style=False)

print(f"Contents of {initial_yaml_file_path} have been dumped into {CONFIG_PATH}.")

# Load necessary data
profile_data = utils.load_config(PROFILE_PATH)
data_file_label = pd.read_csv(FILE_DATA_PATH).groupby("label")
labels = list(data_file_label.groups.keys())
model_profile_data = {}

# Input data example
init_data = {
    "input": {
        "file_name": "n01560419_3101",
        "image_height": 224,
        "image_width": 224,
    }
}

weights = {
    "accuracy": 1.0,
    "confidence": 0.0,
    "latency": 0.0,
    "energy": 0.0,
    "explainability": 0.0
}


# Global variables
sim_config = None
distribution_keys = None
distribution_weights = None
processing_service = None
ensemble_service = None
total_energy_consumption = 0


# Function to configure the pipeline
def config_pipeline():
    global sim_config, distribution_keys, distribution_weights, processing_service, ensemble_service

    # Load simulation config
    sim_config = utils.load_config(CONFIG_PATH)

    # Input distribution
    distribution = sim_config["distribution"]
    distribution_keys = list(distribution.keys())
    distribution_weights = list(distribution.values())

    # Create processing service
    if "processing" in sim_config:
        processing_config = sim_config["processing"]
        processing_config["service_type"] = ServiceType.PROCESSING.value
        processing_service_config = ServiceConfig.model_validate(processing_config)
        processing_service = ProcessingService(processing_service_config)

    # Create ensemble service
    if "ensemble" in sim_config:
        ensemble_config = sim_config["ensemble"]
        ensemble_config["service_type"] = ServiceType.ENSEMBLE.value
        ensemble_service_config = ServiceConfig.model_validate(ensemble_config)
        ensemble_service = EnsembleService(ensemble_service_config)
        #ensemble_service.load_models_from_yaml(sim_config["throughput_requirement"], PROFILE_PATH)




    # Add inference services to ensemble
    if "inference" in sim_config:
        inference_configs = sim_config["inference"]
        for model_name, model_config in inference_configs.items():
            model_config["throughput"] = profile_data[model_name]["throughput"]
            model_config["energy"] = profile_data[model_name]["energy"]
            model_config["response_time"] = profile_data[model_name]["response_time"]
            model_config["service_type"] = ServiceType.INFERENCE.value
            model_config["data_path"] = str(MODEL_DATA_PATH)
            model_config["throughput_requirement"] = int(sim_config["throughput_requirement"])
            inference_service_config = ServiceConfig.model_validate(model_config)
            inference_service = InferenceService(inference_service_config)
            ensemble_service.add_model(inference_service)

# Initialize the pipeline
config_pipeline()

# Extract the state for RL
# Extract state for RL
def get_state(data):
    # Ensemble-level metrics
    energy_report = ensemble_service.energy_estimate()
    ensemble_state = {
        "total_energy_consumption": total_energy_consumption + energy_report["ensemble"],
        "ensemble_size": len(ensemble_service.ensemble),
    }

    # Model-level metrics
    model_states = []
    for model_name, model_data in model_profile_data.items():
        # Directly compute metrics from data_frame
        if "data_frame" in model_data:
            recent_df = model_data["data_frame"].tail(10000)  # Use recent 10,000 entries
            accuracy = recent_df["accuracy"].mean()
            confidence = recent_df["confidence"].mean()
            avg_response_time = recent_df["response_time"].mean()
            max_response_time = recent_df["response_time"].max()
            contribution = recent_df["contribution"].mean()

            model_states.append({
                "name": model_name,
                "accuracy": accuracy,
                "confidence": confidence,
                "avg_response_time": avg_response_time,
                "max_response_time": max_response_time,
                "contribution": contribution,
            })

    # Input state
    input_state = {
        "current_label": data["label"],
        "file_name": data["input"]["file_name"],
    }

    # Combine state
    state = {
        "ensemble": ensemble_state,
        "models": model_states,
        "input": input_state,
    }
    return state


# Function to simulate applying RL agent actions
def apply_action(action):
    manager = ConfigManager(PROFILE_PATH, CONFIG_PATH)

    if action == "keep_ensemble":
        logging.info("Action: Keeping the ensemble")
    
    elif action == "add_model":
        manager.add_best_model(weights)
        logging.info("Action: Adding a model")
        # Logic for adding a new model (Placeholder)
    elif action == "replace_model":
        logging.info("Action: Replacing a model")
        manager.remove_worst_model(weights)
        manager.add_best_model(weights)
        # Logic for replacing a model (Placeholder)
    elif action == "remove_model":
        manager.remove_worst_model(weights)

    elif action == "add_random_model":
        manager.add_random_model()
    elif action == "remove_random_model":
        manager.remove_random_model()
    elif action == "replace_random_model":
        manager.remove_random_model()
        manager.add_random_model()


    else:
        logging.warning("Unknown action")






Contents of initial_sim_config.yaml have been dumped into c:\Documents\Aalto Semester 1\Research\ROHE_orchestrator\ROHE_orchestrator\source\rl_simulation\sim_config.yaml.


## Evaluating different simulations


In [7]:
def run_simulation_loop(duration=300, save_interval=500, action_policy="keep"):
    """
    Runs the simulation loop for a specified duration and saves results periodically.

    Args:
        duration (int): Duration of the simulation loop in seconds. Default is 300 seconds.
        save_interval (int): Number of inferences after which to save results. Default is 500 inferences.

    Returns:
        None
    """
    
    CONFIG_PATH = os.path.join(current_dir, "sim_config.yaml")
    initial_yaml_file_path = "initial_sim_config.yaml"  # Ensure this path matches your desired location
    # Load contents from the initial YAML file
    with open(initial_yaml_file_path, "r") as initial_file:
        initial_yaml_content = yaml.safe_load(initial_file)

    # Write the loaded contents into the target YAML file
    with open(CONFIG_PATH, "w") as target_file:
        yaml.dump(initial_yaml_content, target_file, default_flow_style=False)

    inference_count = 0   # Use the global inference_count variable
    start_time = time.time()

    

    while time.time() - start_time < duration:
        
        config_pipeline()
        
        model_count = 0
        # Prepare input data
        data = copy.deepcopy(init_data)
        selected_key = random.choices(distribution_keys, weights=distribution_weights, k=1)[0]
        if selected_key in labels:
            df_file = data_file_label.get_group(selected_key)
            file_name = random.choice(df_file["file_name"].values)
        data["input"]["file_name"] = file_name
        data["label"] = selected_key

        # Execute services
        data = processing_service.execute(data)
        data = ensemble_service.execute(data)

        # Update model profiles
        for model_name, inferences in data["ml_inference"].items():
            model_count +=1 
            response_time = data["response_time"]["inference"].get(model_name, 0)
            i_label = data["label"]
            i_accuracy = 1 if i_label in inferences else 0
            i_confidence = inferences.get(i_label, 0)
            model_contribution = data["contribution"].get(model_name, 0)

            # Save to profile data
            data_dict = {
                "label": [i_label],
                "accuracy": [i_accuracy],
                "confidence": [i_confidence],
                "response_time": [response_time],
                "contribution": [model_contribution],
            }
            result_df = pd.DataFrame(data_dict)
            if model_name not in model_profile_data:
                model_profile_data[model_name] = {"data_frame": result_df}
            else:
                model_profile_data[model_name]["data_frame"] = pd.concat(
                    [model_profile_data[model_name]["data_frame"], result_df], ignore_index=True
                )
            #logging.info(model_name)
            #logging.info(result_df)
   
        # Get RL state and decide an action
        state = get_state(data)
        print(data)
        
        possible_actions_random = ["keep_ensemble", "add_random_model", "replace_random_model", "remove_random_model"]
        possible_actions_scoring = ["keep_ensemble", "add_model", "replace_model", "remove_model"]
        
        max_ensemble_size = 10
        min_ensemble_size = 4
        # Randomly select an action
        print (model_count)
        if (action_policy == "keep"):
            action = "keep_ensemble"
        elif (action_policy == "random_action"): 
            action = random.choice(possible_actions_random)
            # this limits the size of the ensemble between 3 and 10
            if (model_count >= max_ensemble_size and action == "add_random_model"): action = "keep_ensemble"
            if (model_count <= min_ensemble_size and action == "remove_random_model"): action = "keep_ensemble"
        elif (action_policy == "random_action_scoring"): 
            action = random.choice(possible_actions_scoring)
            if (model_count >= max_ensemble_size and action == "add_model"): action = "keep_ensemble"
            if (model_count <= min_ensemble_size and action == "remove_model"): action = "keep_ensemble"
        else:
            action == "keep"

        # Apply the chosen action
        apply_action(action)

        # Increment inference count
        inference_count += 1

        # Periodically save results
        if inference_count % save_interval == 0:
            logging.info(f"Inference Count: {inference_count}")
            for model_name, model_data in model_profile_data.items():
                model_data["data_frame"].tail(10000).to_csv(f"results/{action_policy}_inference/{model_name}_inference.csv")
                print("saving_data")
        
        logging.info("------------------------------------------------\n\n")

        

        

    logging.info("Simulation complete!")


# Post Simulation Analysis

## Keep original ensemble

In [None]:
run_simulation_loop(duration = 100,action_policy="keep")

In [39]:
import pandas as pd

# Define the path to the CSV file
csv_file_path = "results/keep_inference/ensemble_inference.csv"

# Read the CSV file
print("Action policy keep \n")
df = pd.read_csv(csv_file_path)
print("number of inferences: ", df.size)

# Calculate average accuracy for all labels
average_accuracy_all = df["accuracy"].mean()

# Calculate average accuracy for a specific label
average_accuracy_per_label = df.groupby("label")["accuracy"].mean()

# Print the results

print(f"Average accuracy across all labels: {average_accuracy_all:.4f}")
print(f"Average accuracy for all labels: ") 
print(average_accuracy_per_label)

Action policy keep 

number of inferences:  60000
Average accuracy across all labels: 0.8602
Average accuracy for all labels: 
label
n01440764    0.913022
n01443537    0.928138
n01484850    0.806322
n01491361    0.773036
n01494475    0.884006
Name: accuracy, dtype: float64


## Random actions on ensemble

In [None]:
run_simulation_loop(duration = 100,save_interval=50,action_policy="random_action")

In [56]:
import pandas as pd

# Define the path to the CSV file
csv_file_path = "results/random_action_inference/ensemble_inference.csv"

print("action policy: random action - with Bounds\n")
# Read the CSV file
df = pd.read_csv(csv_file_path)
print("number of inferences: ", df.size)

# Calculate average accuracy for all labels
average_accuracy_all = df["accuracy"].mean()

# Calculate average accuracy for a specific label
average_accuracy_per_label = df.groupby("label")["accuracy"].mean()

# Print the results
print(f"Average accuracy across all labels: {average_accuracy_all:.4f}")
print(f"Average accuracy for all labels: ") 
print(average_accuracy_per_label)

action policy: random action - with Bounds

number of inferences:  34746
Average accuracy across all labels: 0.8698
Average accuracy for all labels: 
label
n01440764    0.922541
n01443537    0.936300
n01484850    0.816045
n01491361    0.785965
n01494475    0.892157
Name: accuracy, dtype: float64


## Scoring actions on ensemble

In [54]:
run_simulation_loop(duration = 100, save_interval=50, action_policy="random_action_scoring")

{'input': {'file_name': 'n01494475_14819', 'image_height': 224, 'image_width': 224}, 'label': 'n01494475', 'response_time': {'processing': {'processing': 0.003301866276014817}, 'inference': {'InceptionResNetV2': 2.089522513513454, 'MobileNetV2': 0.03559614742310411, 'ResNet50V2': 0.11102995108792289, 'ensemble': 2.0979326082767296}}, 'ml_inference': {'InceptionResNetV2': {'n01494475': 0.8732707500457764}, 'MobileNetV2': {'n01494475': 0.8027446866035461}, 'ResNet50V2': {'n01494475': 0.801483690738678}, 'ensemble': {'n01494475': 0.8258330424626669}}, 'explainability': {'InceptionResNetV2': 1, 'MobileNetV2': 0, 'ResNet50V2': 0, 'ensemble': 1}, 'contribution': {'InceptionResNetV2': 0.8732707500457764, 'MobileNetV2': 0.8027446866035461, 'ResNet50V2': 0.801483690738678, 'ensemble': 0.8732707500457764}}
4
Worst model: ResNet50V2 with score: 0.7573739295908658
Model 'ResNet50V2' successfully removed from the target YAML.
Best model: EfficientNetV2S with score: 0.953925925925926
{'accuracy_n014

In [57]:
import pandas as pd

# Define the path to the CSV file
csv_file_path = "results/random_action_scoring_inference/ensemble_inference.csv"

print("action policy: random action with scoring - with bounds\n")
# Read the CSV file
df = pd.read_csv(csv_file_path)
print("number of inferences: ", df.size)

# Calculate average accuracy for all labels
average_accuracy_all = df["accuracy"].mean()

# Calculate average accuracy for a specific label
average_accuracy_per_label = df.groupby("label")["accuracy"].mean()

# Print the results
print(f"Average accuracy across all labels: {average_accuracy_all:.4f}")
print(f"Average accuracy for all labels: ") 
print(average_accuracy_per_label)

action policy: random action with scoring - with bounds

number of inferences:  37788
Average accuracy across all labels: 0.8763
Average accuracy for all labels: 
label
n01440764    0.927490
n01443537    0.939683
n01484850    0.824981
n01491361    0.791227
n01494475    0.900412
Name: accuracy, dtype: float64


## RL test

Moved to the rl_test notebook. This part is deprecated and kept for backup purposes


In [11]:
### DEFINE GET STATE RL

# IMAGE DATA +
# ENSEMBLE MDOELS DATA ( CAPPED AT 10)

# FLATTEN

"""Input ImageA: A labeled image (n01443537).
Inference:
ResNet50V2 has the highest confidence (0.9994).
The ensemble confidence is slightly lower at 0.9626.
Response Times:
MobileNetV2 is the fastest model (0.0295s).
InceptionResNetV2 is the slowest (2.0878s).
The ensemble's inference time reflects the slowest model (approximately 2.1 seconds).
Explainability:
Only InceptionResNetV2 and the ensemble have explainability enabled.
Contributions:
ResNet50V2 dominates with the highest contribution (0.9994)."""

import numpy as np

def get_state(data):
    # Ensemble-level metrics
    energy_report = ensemble_service.energy_estimate()
    ensemble_state = {
        "total_energy_consumption": total_energy_consumption + energy_report["ensemble"],
        "ensemble_size": len(ensemble_service.ensemble),
    }

    # Model-level metrics
    model_states = []
    for model_name, model_data in model_profile_data.items():
        # Directly compute metrics from data_frame
        if "data_frame" in model_data:
            recent_df = model_data["data_frame"].tail(10000)  # Use recent 10,000 entries
            accuracy = recent_df["accuracy"].mean()
            confidence = recent_df["confidence"].mean()
            avg_response_time = recent_df["response_time"].mean()
            max_response_time = recent_df["response_time"].max()
            contribution = recent_df["contribution"].mean()

            model_states.append([
                accuracy,
                confidence,
                avg_response_time,
                max_response_time,
                contribution
            ])

    # Ensure fixed-length state for 10 models
    max_models = 10
    num_features = 5  # Accuracy, Confidence, Avg Response, Max Response, Contribution
    while len(model_states) < max_models:
        model_states.append([0.0] * num_features)  # Padding with zeros

    # Truncate if more than 10 models (just in case)
    model_states = model_states[:max_models]

    # Input state (flattened input metrics)
    input_state = [
        len(data["input"]["file_name"]),  # Example input length
        data["input"]["image_height"],
        data["input"]["image_width"],
    ]

    # Ensemble state metrics
    ensemble_state_vector = [
        ensemble_state["total_energy_consumption"],
        ensemble_state["ensemble_size"],
    ]
    """
    # Combine and flatten state
    state = np.concatenate([
        np.array(ensemble_state_vector, dtype=np.float32),
        np.array(model_states, dtype=np.float32).flatten(),
        np.array(input_state, dtype=np.float32)
    ]) 
    """
    structured_state = {
        "ensemble_state": ensemble_state,
        "model_states": model_states,
        "input_state": input_state
    }


    return structured_state

def flatten_structured_state(structured_state):
    """
    Flattens the structured state dictionary into a single NumPy array.

    Args:
        structured_state (dict): State dictionary with keys:
            - "ensemble_state": dict with ensemble metrics.
            - "model_states": list of model-level metrics.
            - "input_state": list of input-level metrics.

    Returns:
        np.array: Flattened state array.
    """
    # Ensemble state as a vector
    ensemble_state_vector = [
        structured_state["ensemble_state"]["total_energy_consumption"],
        structured_state["ensemble_state"]["ensemble_size"]
    ]

    # Model states as a flattened vector
    model_states_vector = []
    for model in structured_state["model_states"]:
        model_states_vector.extend(model)  # Assuming model states are lists of 5 metrics

    # Input state as a vector
    input_state_vector = structured_state["input_state"]

    # Combine all parts into a single flattened array
    flattened_state = np.concatenate([
        np.array(ensemble_state_vector, dtype=np.float32),
        np.array(model_states_vector, dtype=np.float32),
        np.array(input_state_vector, dtype=np.float32)
    ])

    return flattened_state




In [5]:
def random_data_metrics():
    # returns data and inference metrics on a random image
    config_pipeline()

    model_count = 0
    # Prepare input data: 224-224
    data = copy.deepcopy(init_data)
    
    # SELECT THE INPUT IMAGE
    selected_key = random.choices(distribution_keys, weights=distribution_weights, k=1)[0]
    if selected_key in labels:
        df_file = data_file_label.get_group(selected_key)
        file_name = random.choice(df_file["file_name"].values)
    data["input"]["file_name"] = file_name
    data["label"] = selected_key
    
    
    # Execute services
    data = processing_service.execute(data)
    data = ensemble_service.execute(data)
    
    
    # Update model profiles
    for model_name, inferences in data["ml_inference"].items():
        model_count +=1 
        response_time = data["response_time"]["inference"].get(model_name, 0)

        i_label = data["label"]
        i_accuracy = 1 if i_label in inferences else 0
        i_confidence = inferences.get(i_label, 0)
        model_contribution = data["contribution"].get(model_name, 0)


        # Save to profile data
        data_dict = {
            "label": [i_label],
            "accuracy": [i_accuracy],
            "confidence": [i_confidence],
            "response_time": [response_time],
            "contribution": [model_contribution],
        }
        result_df = pd.DataFrame(data_dict)
        if model_name not in model_profile_data:
            model_profile_data[model_name] = {"data_frame": result_df}
        else:
            model_profile_data[model_name]["data_frame"] = pd.concat(
                [model_profile_data[model_name]["data_frame"], result_df], ignore_index=True
            )

    return data
    

In [9]:
import gym
from gym import spaces
import numpy as np

#substitute everything with get state

# ADD DISTRIBUTION WEIGHTS TO THE SIMULATION ENV

class SimulationEnv(gym.Env):

    def __init__(self):
        super(SimulationEnv, self).__init__()

        # Define constants
        self.state_size = 55  # Fixed state size: 55 features

        # Observation space: Fixed size of 55
        self.observation_space = spaces.Box(
            low=0.0, high=10.0, shape=(self.state_size,), dtype=np.float32
        )

        # Action space (example: add/remove/replace a model)
        self.action_space = spaces.Discrete(6)

    def reset(self):
        # Reset the environment to an initial state

        # Define Init Data:
        data = random_data_metrics()
        """config_pipeline()

        model_count = 0
        # Prepare input data: 224-224
        data = copy.deepcopy(init_data)
        
        # SELECT THE INPUT IMAGE
        selected_key = random.choices(distribution_keys, weights=distribution_weights, k=1)[0]
        if selected_key in labels:
            df_file = data_file_label.get_group(selected_key)
            file_name = random.choice(df_file["file_name"].values)
        data["input"]["file_name"] = file_name
        data["label"] = selected_key
        
        
        # Execute services
        data = processing_service.execute(data)
        data = ensemble_service.execute(data)
        
        
        # Update model profiles
        for model_name, inferences in data["ml_inference"].items():
            model_count +=1 
            response_time = data["response_time"]["inference"].get(model_name, 0)

            i_label = data["label"]
            i_accuracy = 1 if i_label in inferences else 0
            i_confidence = inferences.get(i_label, 0)
            model_contribution = data["contribution"].get(model_name, 0)


            # Save to profile data
            data_dict = {
                "label": [i_label],
                "accuracy": [i_accuracy],
                "confidence": [i_confidence],
                "response_time": [response_time],
                "contribution": [model_contribution],
            }
            result_df = pd.DataFrame(data_dict)
            if model_name not in model_profile_data:
                model_profile_data[model_name] = {"data_frame": result_df}
            else:
                model_profile_data[model_name]["data_frame"] = pd.concat(
                    [model_profile_data[model_name]["data_frame"], result_df], ignore_index=True
                )
        
        # end here"""

        self.current_state = get_state(data)  # Get initial state
        print(self.current_state)
        return flatten_structured_state(self.current_state)

    def step(self, action):
        
                
        if action == 0:
            apply_action("keep_ensemble")
        elif action == 1:
            apply_action("add_model")
        elif action == 2:
            apply_action("replace_model")


        config_pipeline()

        model_count = 0
        # Prepare input data
        data = copy.deepcopy(init_data)
 
        selected_key = random.choices(distribution_keys, weights=distribution_weights, k=1)[0]
        if selected_key in labels:
            df_file = data_file_label.get_group(selected_key)
            file_name = random.choice(df_file["file_name"].values)
        data["input"]["file_name"] = file_name

        data["label"] = selected_key

        # Execute services
        data = processing_service.execute(data)
        data = ensemble_service.execute(data)
        
        
        # Update model profiles
        for model_name, inferences in data["ml_inference"].items():
            model_count +=1 
            response_time = data["response_time"]["inference"].get(model_name, 0)
            
            i_label = data["label"]
            i_accuracy = 1 if i_label in inferences else 0
            i_confidence = inferences.get(i_label, 0)
            model_contribution = data["contribution"].get(model_name, 0)
            

            # Save to profile data
            data_dict = {
                "label": [i_label],
                "accuracy": [i_accuracy],
                "confidence": [i_confidence],
                "response_time": [response_time],
                "contribution": [model_contribution],
            }
            result_df = pd.DataFrame(data_dict)
            if model_name not in model_profile_data:
                model_profile_data[model_name] = {"data_frame": result_df}
            else:
                model_profile_data[model_name]["data_frame"] = pd.concat(
                    [model_profile_data[model_name]["data_frame"], result_df], ignore_index=True
                )
            #logging.info(model_name)
            #logging.info(result_df)
   
        # Get RL state and decide an action

        state = get_state(data)

        # Simulate a step in the environment
        self.current_state = state
        reward = self._calculate_reward(self.current_state)
        print("STATE:" , state)
        done = False  # Define termination condition if applicable
       
        return flatten_structured_state(self.current_state), reward, done, {}

    def _state_to_observation(self, state):
        # Convert the state dictionary to a flat vector for the RL agent
        return np.array(list(state["ensemble"].values()) + [model["accuracy"] for model in state["models"]])

    def _calculate_reward(self, state):
        model_accuracies = [model[0] for model in state.get("model_states", []) if model]  # Index 0 -> Accuracy
        
        # Reward is the mean accuracy; default to 0 if no accuracies exist
        if model_accuracies:
            accuracy_reward = np.mean(model_accuracies)
        else:
            accuracy_reward = 0.0

        # Debugging output
        print("Model Accuracies:", model_accuracies)
        print("Accuracy Reward:", accuracy_reward)

        return accuracy_reward

In [None]:
from stable_baselines3 import PPO

# Create the environment
env = SimulationEnv()

# Train the agent
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=100)

# Save the trained model
model.save("simulation_policy")


In [None]:
env = SimulationEnv()
state = env.reset()
action = 0  # Example action
next_state, reward, done, _ = env.step(action)
print("Next State:", next_state)
print("Reward:", reward)

"""
data before processing:{'input': {'file_name': 'n01484850_24355', 'image_height': 224, 'image_width': 224}, 'label': 'n01484850'} 
data_after_processing: {'input': {'file_name': 'n01484850_24355', 'image_height': 224, 'image_width': 224}, 'label': 'n01484850', 'response_time': {'processing': {'processing': 0.003390288033555108}, 'inference': {'InceptionResNetV2': 2.0996002099127553, 'MobileNetV2': 0.031032505692886653, 'ResNet50V2': 0.11703748376114145, 'ensemble': 2.108148874226313}}, 'ml_inference': {'InceptionResNetV2': {'n01491361': 0.5134068131446838}, 'MobileNetV2': {'n01491361': 0.3025656044483185}, 'ResNet50V2': {'n01491361': 0.6498599052429199}, 'ensemble': {'n01491361': 0.48861077427864075}}, 'explainability': {'InceptionResNetV2': 1, 'MobileNetV2': 0, 'ResNet50V2': 0, 'ensemble': 1}, 'contribution': {'InceptionResNetV2': 0.5134068131446838, 'MobileNetV2': 0.3025656044483185, 'ResNet50V2': 0.6498599052429199, 'ensemble': 0.6498599052429199}}
"""
