In [3]:
pip install stable-baselines3 gymnasium numpy torch

Collecting stable-baselines3
  Downloading stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (

In [None]:
import gymnasium as gym
import numpy as np
import random
from gymnasium import spaces

class AdaptiveMixedCriticalityEnv(gym.Env):
    
    #Adaptive Mixed-Criticality scheduling environment with dynamic budget adjustment.
  
    def __init__(self):
        super(AdaptiveMixedCriticalityEnv, self).__init__()

        # Define tasks as [Execution Time, Deadline, Criticality Level, Budget]
        self.tasks = [
            [2, 5, 1, 2],  # Low-criticality task (LC)
            [3, 7, 1, 3],  # Low-criticality task (LC)
            [4, 6, 2, 4],  # High-criticality task (HC)
            [1, 4, 2, 1]    # High-criticality task (HC)
        ]

        self.state = np.zeros(len(self.tasks))  # Task completion state
        self.time = 0  # Simulation clock
        self.high_criticality_mode = False  # Mode flag

        # Observation space: time + task completion status + mode flag
        self.observation_space = spaces.Box(low=0, high=10, shape=(len(self.tasks) + 2,), dtype=np.float32)

        # Action space: Selecting a task to execute
        self.action_space = spaces.Discrete(len(self.tasks))

    def step(self, action):
       
        #Perform an action (execute a task), adjust budgets if needed, and return next state.
        
        exec_time, deadline, crit_level, budget = self.tasks[action]

        # Check if task exceeds its budget (overrun)
        if exec_time > budget:
            if crit_level == 1:  # Low-criticality task
                budget = max(1, budget - 1)  # Reduce budget dynamically
                reward = -5  # Penalty for exceeding budget
            else:  # High-criticality task
                self.high_criticality_mode = True  # Enter High-Criticality Mode
                reward = -10  # Heavy penalty for HC task overrun
        else:
            reward = 10 if crit_level == 2 else 5  # Reward for completing tasks normally

        # Mode Switching: Reduce low-criticality execution in High-Criticality Mode
        if self.high_criticality_mode and crit_level == 1:
            reward -= 3  # Penalty for LC task execution in High-Criticality Mode

        # Update system state
        self.state[action] = 1  # Task completed
        self.time += exec_time

        # Check if all tasks are completed or deadline is exceeded
        done = np.all(self.state == 1) or self.time > max(d for _, d, _, _ in self.tasks)

        # Observation includes current time, task completion status, and mode flag
        obs = np.append(self.state, [self.time, int(self.high_criticality_mode)])

        return obs, reward, done, False, {}

    def reset(self, seed=None, options=None):
        #Reset the environment to the initial state.
        
        self.state = np.zeros(len(self.tasks))
        self.time = 0
        self.high_criticality_mode = False
        return np.append(self.state, [self.time, int(self.high_criticality_mode)]), {}

    def render(self):
       
        #Print the current state for debugging.
        mode = "High-Criticality Mode" if self.high_criticality_mode else "Normal Mode"
        print(f"Time: {self.time}, Mode: {mode}, Task Status: {self.state}")


In [None]:
from stable_baselines3 import DQN

# Create environment
env = AdaptiveMixedCriticalityEnv()

# Create and train DQN model
model = DQN("MlpPolicy", env, verbose=1, learning_rate=0.001, buffer_size=10000, batch_size=32, exploration_fraction=0.1)
model.learn(total_timesteps=10000)

# Save the trained model
model.save("dqn_amc_dynamic_scheduler")
print("Model trained and saved successfully.")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
|    learning_rate    | 0.001    |
|    loss             | 0.00592  |
|    n_updates        | 861      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 4.89     |
|    ep_rew_mean      | 48.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 748      |
|    fps              | 1053     |
|    time_elapsed     | 3        |
|    total_timesteps  | 3566     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.00414  |
|    n_updates        | 866      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 4.89     |
|    ep_rew_mean      | 48.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 752      |
|    fps              | 1

In [5]:
# Load trained model
model = DQN.load("dqn_amc_dynamic_scheduler")

# Test environment with trained model
obs, _ = env.reset()
done = False

while not done:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, _, _ = env.step(action)
    env.render()  # Print task execution status and mode changes

Time: 4, Mode: Normal Mode, Task Status: [0. 0. 1. 0.]
Time: 5, Mode: Normal Mode, Task Status: [0. 0. 1. 1.]
Time: 6, Mode: Normal Mode, Task Status: [0. 0. 1. 1.]
Time: 7, Mode: Normal Mode, Task Status: [0. 0. 1. 1.]
Time: 8, Mode: Normal Mode, Task Status: [0. 0. 1. 1.]
