<a href="https://colab.research.google.com/github/Mohammadhosseinkarimi/DQN_V1/blob/main/DQN_V1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# first i create my envirement
# i use petting zoo to create envirement for task scheduler
# so first install petting zoo
!pip install pettingzoo

Collecting pettingzoo
  Downloading pettingzoo-1.25.0-py3-none-any.whl.metadata (8.9 kB)
Downloading pettingzoo-1.25.0-py3-none-any.whl (852 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m852.5/852.5 kB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pettingzoo
Successfully installed pettingzoo-1.25.0


In [2]:
from pettingzoo.utils.env import AECEnv
from gymnasium import spaces
import numpy as np

NUM_MACHINES = 2
NUM_RESOURCES = 2  # e.g., CPU, RAM
MAX_TASKS = 5  # tasks in queue
MAX_RESOURCE = 10

class TaskSchedulingEnv(AECEnv):
    metadata = {"render_modes": ["human"], "name": "task_scheduler_v0"}

    def __init__(self):
        super().__init__()
        self.agents = [f"agent_{i}" for i in range(NUM_MACHINES)]
        self.possible_agents = self.agents[:]

        # Observation space: resource state + task queue
        self.observation_spaces = {
            agent: spaces.Box(low=0, high=MAX_RESOURCE, shape=(NUM_RESOURCES + MAX_TASKS * (NUM_RESOURCES + 1),), dtype=np.int32)
            for agent in self.agents
        }

        # Action space: select task index or 0 for "no action"
        self.action_spaces = {
            agent: spaces.Discrete(MAX_TASKS + 1)
            for agent in self.agents
        }

        self.reset()

    def reset(self, seed=None, options=None):
        self.agent_idx = 0
        self.agents = self.possible_agents[:]
        self.resources = np.full((NUM_MACHINES, NUM_RESOURCES), MAX_RESOURCE)
        self.tasks = self._generate_tasks()
        self.task_queue = self.tasks.copy()
        self._agent_selector = iter(self.agents)
        self.agent_selection = next(self._agent_selector)

        self.rewards = {agent: 0 for agent in self.agents}
        self.terminations = {agent: False for agent in self.agents}
        self.truncations = {agent: False for agent in self.agents}
        self.infos = {agent: {} for agent in self.agents}

    def observe(self, agent):
        i = int(agent.split('_')[1])
        flat_resources = self.resources[i].tolist()
        flat_tasks = []
        for task in self.task_queue:
            flat_tasks += task['resources'] + [task['duration']]
        while len(flat_tasks) < MAX_TASKS * (NUM_RESOURCES + 1):
            flat_tasks += [0] * (NUM_RESOURCES + 1)
        return np.array(flat_resources + flat_tasks, dtype=np.int32)

    def _generate_tasks(self):
        return [
            {
                "resources": list(np.random.randint(1, MAX_RESOURCE // 2, size=NUM_RESOURCES)),
                "duration": np.random.randint(1, 5)
            }
            for _ in range(MAX_TASKS)
        ]

    def step(self, action):
        agent = self.agent_selection
        i = int(agent.split('_')[1])

        reward = 0
        if action > 0 and action <= len(self.task_queue):
            task = self.task_queue[action - 1]
            if all(task["resources"][j] <= self.resources[i][j] for j in range(NUM_RESOURCES)):
                self.resources[i] -= task["resources"]
                self.task_queue.pop(action - 1)
                reward = 1  # reward for successful scheduling

        self.rewards[agent] = reward

        try:
            self.agent_selection = next(self._agent_selector)
        except StopIteration:
            self._agent_selector = iter(self.agents)
            self.agent_selection = next(self._agent_selector)

    def render(self):
        for i, res in enumerate(self.resources):
            print(f"Machine {i} resources: {res}")
        print(f"Queue: {self.task_queue}")



ModuleNotFoundError: No module named 'pettingzoo'