In [1]:
# coding=utf-8rt os
import time
import numpy as np
import tensorflow as tf
from multiprocessing import Process, Manager
import sys
import warnings
from data.loader import InstanceConfigLoader
from framework.instance import InstanceConfig
from framework.machine import MachineConfig
from framework.episode import Episode
from framework.trigger import ThresholdTrigger
from framework.DRL.agent import Agent
from framework.DRL.DRL import RLAlgorithm
from framework.DRL.policynet import PolicyNet
from framework.DRL.reward_giver import AverageCompletionRewardGiver, MakespanRewardGiver
from framework.DRL.utils import features_extract_func, features_normalize_func, multiprocessing_run

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
np.random.seed(41)
tf.random.set_random_seed(41)
machines_number = 1313
# machine_configs = [MachineConfig(i, 64, 1e9, 1e9) for i in range(machines_number)]
machine_configs = [MachineConfig(i, 64) for i in range(machines_number)]
vm_cpu_request_file = '/hhd/sxy/cloudsim-alibaba2017/resources/configuration/instance_plan_cpu_100.csv'
vm_machine_id_file = '/hhd/sxy/cloudsim-alibaba2017/resources/configuration/instance_machine_id_100.csv'
vm_cpu_utils_folder = '/hhd/sxy/cloudsim-alibaba2017/resources/workload/alibaba2017/instance_all'
instance_configs = InstanceConfigLoader(vm_cpu_request_file, vm_machine_id_file, vm_cpu_utils_folder)

In [None]:
# ************************ Parameters Setting Start ************************
n_iter = 100
n_episode = 2

policynet = PolicyNet(5)
reward_giver = MakespanRewardGiver(-1)
features_extract_func = features_extract_func
features_normalize_func = features_normalize_func

name = '%s-%s-m%d' % (reward_giver.name, policynet.name, machines_number)
model_dir = './agents/%s' % name
# ************************ Parameters Setting End ***********************

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'


if not os.path.isdir(model_dir):
    os.makedirs(model_dir)

agent = Agent(name, policynet, 1, reward_to_go=True, nn_baseline=True, normalize_advantages=True,
              model_save_path='%s/model.ckpt' % model_dir)



for itr in range(n_iter):
    tic = time.time()
    print("******************** Iteration %i ********************" % itr)
    processes = []

    manager = Manager()
    trajectories = manager.list([])
    makespans = manager.list([])
    average_completions = manager.list([])
    average_slowdowns = manager.list([])
    
    # A complete simulation using the whole dataset
    for i in range(n_episode):
        print("********* Episode %i *********" % i)
        # The samples are different
        algorithm = RLAlgorithm(agent, reward_giver, features_extract_func=features_extract_func,
                                features_normalize_func=features_normalize_func)
        trigger = ThresholdTrigger()
        print("trigger has worked!")
        episode = Episode(machine_configs, instance_configs, trigger, algorithm, None)
        algorithm.reward_giver.attach(episode.simulation)
        print("reward giver has worked!")
        p = Process(target=multiprocessing_run,
                    args=(episode, trajectories, makespans))

        processes.append(p)

    for p in processes:
        p.start()

    for p in processes:
        p.join()

    agent.log('makespan', np.mean(makespans), agent.global_step)

    toc = time.time()

    print("Mean of makespans:", np.mean(makespans), "Duration:", toc - tic)

    all_observations = []
    all_actions = []
    all_rewards = []
    for trajectory in trajectories:
        observations = []
        actions = []
        rewards = []
        for node in trajectory:
            observations.append(node.observation)
            actions.append(node.action)
            rewards.append(node.reward)

        all_observations.append(observations)
        all_actions.append(actions)
        all_rewards.append(rewards)

    all_q_s, all_advantages = agent.estimate_return(all_rewards)

    # Different models for each iteration
    agent.update_parameters(all_observations, all_actions, all_advantages)

agent.save()


******************** Iteration 0 ********************
********* Episode 0 *********
trigger has worked!
reward giver has worked!
********* Episode 1 *********
trigger has worked!
reward giver has worked!
At 600 scheduler was triggered!
AtMachines are over utilized:  [248]600
 Instances to reschedule:scheduler was triggered! 
[70]Machines are over utilized:
 [248]
Instances to reschedule: [70]
Instance Instance70  70has been scheduled to Machine  has been scheduled to Machine865 159

Env time: 43201
Env time: 43201
Mean of makespans: 43201.0 Duration: 0.1849806308746338
******************** Iteration 1 ********************
********* Episode 0 *********
trigger has worked!
reward giver has worked!
********* Episode 1 *********
trigger has worked!
reward giver has worked!
At 600 Atscheduler was triggered! 
600Machines are over utilized:  scheduler was triggered![248]

Machines are over utilized:Instances to reschedule:  [248][70]

Instances to reschedule: [70]
Instance Instance70  70has b