In [1]:
import os
import time
import numpy as np
import sys
import torch
import torch.optim as optim
import torch.nn.functional as F

os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
sys.path.append('../../../')
from agora.DAG.algorithm.gnnDeepRL.agent import Agent
from cogito.machine import MachineConfig
from agora.DAG.utils.csv_reader import CSVReader
from agora.auxiliary.tools import average_completion, average_slowdown
from agora.DAG.adapter.episode import Episode
from agora.DAG.algorithm.gnnDeepRL.DRL import RLAlgorithm
from agora.DAG.algorithm.gnnDeepRL.reward_giver import EnergyOptimisationRewardGiverV2
from agora.DAG.utils.feature_functions import *
from agora.DAG.algorithm.gnnDeepRL.utils import ExperienceBuffer
# Import the CloudResourceGNN and train_gnn_ac functions
from agora.DAG.algorithm.gnnDeepRL.brain import CloudResourceGNN, train_gnn_ac

Using device: cuda


In [2]:
os.environ['CUDA_VISIBLE_DEVICES'] = ''
np.random.seed(41)

# ************************ Parameters Setting Start ************************
machines_number = 5
jobs_len = 10
n_iter = 30
jobs_csv = '../jobs_files/batch_task.csv'

reward_giver = EnergyOptimisationRewardGiverV2()

name = f'GNN-{machines_number}'
model_dir = f'./agents/{name}'
summary_path = f'./Tensorboard/{name}'

# ************************ Parameters Setting End ************************

if not os.path.isdir(model_dir):
    os.makedirs(model_dir)

csv_reader = CSVReader(jobs_csv)
machine_configs = [
                   MachineConfig(240, 1, 1, "ThinkSystem-SR850-V3",1900), MachineConfig(8, 1, 1, "HPProLiant-ML30-Gen11",3200),
                   MachineConfig(80, 1, 1, "FALINUX-AnyStor-700EC-NM",3000),MachineConfig(64, 1, 1, "Dell-PowerEdgeHS5610",2100), MachineConfig(120, 1, 1, "FusionServer-1288H-V7",1900)]
from agora.DAG.algorithm.gnnDeepRL.utils import *

# Initialize GNN model
node_feature_dim = 5  # Adjust based on your DAG node features
resource_feature_dim = 35  # Adjust based on your resource features
hidden_dim = 64
action_dim = len(machine_configs)  # Number of possible actions (machines to allocate)
experience_buffer=ExperienceBuffer(100000)
agent = Agent("gnn",0.95, reward_to_go=True, nn_baseline=True, normalize_advantages=True,experience_buffer=experience_buffer)
jobs_len=5

epsilon = 1.0  # Start with full exploration
epsilon_min = 0.01  # Minimum exploration
epsilon_decay = 0.995  # Epsilon decay rate per step
# Main training loop

length of dataframe:  98434


  df.instances_num = df.inst_num.astype(dtype=int)


In [3]:
pip show torch

Name: torch
Version: 2.4.1+cu124
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org/
Author: PyTorch Team
Author-email: packages@pytorch.org
License: BSD-3
Location: C:\Users\AH274303\AppData\Local\anaconda3\envs\CloudEnvironement\Lib\site-packages
Requires: filelock, fsspec, jinja2, networkx, sympy, typing-extensions
Required-by: torchaudio, torchvision
Note: you may need to restart the kernel to use updated packages.


In [4]:
import torch
import torch.nn.functional as F
print("PyTorch imported successfully!")

PyTorch imported successfully!


In [None]:

for job_chunk in range(0, 15):
    jobs_configs = csv_reader.generate(jobs_len * job_chunk, jobs_len)
    algorithm = RLAlgorithm(agent, reward_giver, features_normalize_func=features_normalize_func,
                            features_extract_func=features_extract_func, epsilon=epsilon, epsilon_min=epsilon_min,
                            epsilon_decay=epsilon_decay)

    for itr in range(n_iter):
        print(f"********** Iteration {itr} ************")
        trajectories = []
        makespans = []
        average_completions = []
        average_slowdowns = []

        tic = time.time()
        # Collect trajectories
        for e in range(10):
            print(f"********** Episode {e} ************")

            episode = Episode(machine_configs, jobs_configs, algorithm, None)
            algorithm.reward_giver.attach(episode.simulation)
            episode.run()
            trajectories.append(episode.simulation.scheduler.algorithm.current_trajectory)
            makespans.append(episode.simulation.env.now)
            average_completions.append(average_completion(episode))
            average_slowdowns.append(average_slowdown(episode))
            embeddings_log = algorithm.get_embeddings_log()

            all_embeddings = np.concatenate(embeddings_log, axis=0)

            # Create directory structure
            dir_path = f'job_chunk_{job_chunk}/iteration_{itr}'
            os.makedirs(dir_path, exist_ok=True)
            
            # Save embeddings to a file
            file_path = os.path.join(dir_path, f'embeddings_episode_{e}.npy')
            print(f'file_path {file_path}')
            np.save(file_path, all_embeddings)

        # Extract states, actions, returns, and advantages from trajectories
        all_observations = []
        all_actions = []
        all_rewards = []
        all_next_observations = []

        for trajectory in trajectories:
            observations = []
            actions = []
            rewards = []
            next_observations = []

            for i, node in enumerate(trajectory):
                observations.append(node.observation)
                actions.append(node.action)
                rewards.append(node.reward)

                # Add next observation
                if i < len(trajectory) - 1:
                    next_observations.append(trajectory[i + 1].observation)
                else:
                    # For the last step, use the same observation as next_observation
                    # or a terminal state representation if available
                    next_observations.append(node.observation)

            all_observations.append(observations)
            all_actions.append(actions)
            all_rewards.append(rewards)
            all_next_observations.append(next_observations)

        # Assuming all_observations, all_actions, all_rewards, and all_advantages are lists of lists
        for observations, actions, rewards, next_observations in zip(all_observations, all_actions,
                                                                     all_rewards,
                                                                     all_next_observations):
            for i, (obs, act, rew, next_obs) in enumerate(
                    zip(observations, actions, rewards, next_observations)):
                done = 1 if i == len(observations) - 1 else 0  # Assume last step in trajectory is done
                experience = (obs, act, rew, next_obs, done)
                agent.experience_buffer.add(experience)

        update_parameters(agent)
        toc = time.time()
        print(f"Iteration {itr}, Time: {(toc - tic) / 12:.2f}")
        print(f"Average Makespan: {np.mean(makespans):.2f}, "
              f"Avg Completion: {np.mean(average_completions):.2f}, "
              f"Avg Slowdown: {np.mean(average_slowdowns):.2f}")

        # Save the model periodically
        if (itr + 1) % 10 == 0:
            torch.save(agent.actor.state_dict(), f'{model_dir}/actor_model_iter_{itr + 1}.pth')
            torch.save(agent.critic.state_dict(), f'{model_dir}/critic_model_iter_{itr + 1}.pth')

# Save the final trained model
torch.save(agent.actor.sta-te_dict(), f'{model_dir}/actor_model_final.pth')
torch.save(agent.critic.state_dict(), f'{model_dir}/critic_model_final.pth')

Length of job_configs is  5
Jobs number:  5
Tasks number: 17
Task instances number mean:  204.1764705882353
Task instances number std 744.2816699020938
Task instances cpu mean:  96.08182080092192
Task instances cpu std:  13.43714373361451
Task instances memory mean:  0.37806107749927975
Task instances memory std:  0.04604136482569786
Task instances duration mean:  124.30452319216364
Task instances duration std:  40.37536751800922
********** Iteration 0 ************
********** Episode 0 ************
Epsilon min:  0.01451687645335