# 导入包和数据

In [1]:
import pandas
import numpy
import gurobipy as gp
import json

# read data
with open("sorted_ToyData_dict.json") as json_file:
     toy_data_dict= json.load(json_file)
with open("location.json") as json_file:
     location = json.load(json_file)
with open("slot.json") as json_file:
     slot = json.load(json_file)
with open("data_req.json") as json_file:
     demand = json.load(json_file)
with open("bandwidth.json") as json_file:
     bandwidth = json.load(json_file)

# 初始化常量

In [2]:
# number of jobs and slots
w = len(toy_data_dict)
J = len(slot)

# number of tasks in a job in stage 1
L = []
for key in toy_data_dict.keys():
    L.append(len(toy_data_dict[key]['stage']['1']))

# slots number in a data center
a = [];
for val in slot.values():
    a.append(val)

# all tasks in stage 1
tasks_1 = []
for i in toy_data_dict.keys():
    tasks_1.append(toy_data_dict[i]['stage']['1'])

# normalize location
for (key,val) in location.items():
    s = val[2:]
    location[key] = int(s)

# execution time of certain tasks
num = 0
e = []
for i in toy_data_dict.keys():
    exc = []
    for j in toy_data_dict[i]['Execution Time'].keys():
        if j in tasks_1[num]:
            exc.append(toy_data_dict[i]['Execution Time'][j])
    e.append(exc)
    num += 1
    
# communication time matrix
c = []
for k in range(w):
    com = []
    for i in range(len(tasks_1[k])):
        compute = []
        for j in range(J):
            maximum = 0
            for loc in demand[tasks_1[k][i]]:
                src = location[loc] - 1
                time = demand[tasks_1[k][i]][loc]/bandwidth[src][j]
                if(time > maximum):
                    maximum = time
            compute.append(maximum)
        com.append(compute)
    c.append(com)
    
# completion time records
complete_time = {}
job_complete_time = {}

# completed job records
complete_jobs = []

# record global time
global_time = 0

# record current stages and total stages
stages = [1,1,1,1,1,1]
total_stages = []
s = ''
for i in toy_data_dict.keys():
    for j in toy_data_dict[i]['stage'].keys():
        s = int(j)
    total_stages.append(s)
    
# slots occupied by jobs
occ_slots = {}

In [3]:
# set thresholds for rescheduling
time_threshold = 10
job_threshold = 6

# 线性求解接口实现

In [4]:
def LP(K, L, avai, M):
    # define model
    m = gp.Model()
    
    # add variables
    x = [0] * w
    X = [0] * w
    for k in K:
        x[k] = m.addVars(L[k],J,2,name="lambda"+str(k))
        X[k] = m.addVars(L[k],J,name="x"+str(k))
        
    # update model
    m.update()
    
    # set objective
    m.setObjective(
    gp.quicksum(x[k][i,j,0]+x[k][i,j,1]*pow(M,c[k][i][j]+e[k][i])
            for k in K for i in range(L[k]) for j in range(J)),
    gp.GRB.MINIMIZE
    )
    
    # add constraints
    m.addConstrs(X[k][i,j] == x[k][i,j,1] 
            for k in K for i in range(L[k]) for j in range(J))
    m.addConstrs(x[k][i,j,0] + x[k][i,j,1] == 1
            for k in K for i in range(L[k]) for j in range(J))
    m.addConstrs(gp.quicksum(X[k][i,j] for k in K for i in range(L[k]))
            <= avai[j] for j in range(J))
    m.addConstrs(gp.quicksum(X[k][i,j] for j in range(J)) == 1
            for k in K for i in range(L[k]))
    
    # solve it
    m.optimize()
    
    # res records the assigned tasks locations
    res = []
    for var in m.getVars():
        if(var.X == 1.0 and var.varName[0] == 'x'):
            res.append(int(var.varName[5:-1])+1)
    return res

In [5]:
def update(K, avai, M):
    num = 0
    maxim = 0
    bottleneck = 0
    
    # find the task with the longest time
    for i in K:
        for j in range(len(tasks_new[i])):
            time = c[i][j][res[num]-1] + e[i][j]
            if(time > maxim):
                maxim = time
                bottleneck = i
            num += 1
    complete_time[bottleneck] = global_time + maxim
    
    # find all task in the longest job
    occ_slots[bottleneck] = []
    setted = []
    num = 0
    for i in K:
        for j in range(len(tasks_new[i])):
            if(i == bottleneck):
                setted.append(res[num]-1)
                occ_slots[bottleneck].append(res[num]-1)
                location[tasks_new[i][j]] = res[num]
            num += 1
    
    # remove the longest job from the scheduling queue
    K.remove(bottleneck)
    M = 0
    
    # update M and available slots
    for i in K:
        M += J * L[i]
    for i in setted:
        avai[i] = avai[i] - 1
    return K, avai, M

# 多轮调度接口实现

In [6]:
def release_resource():
    # sort the jobs by their complete time
    complete_time_order = sorted(complete_time.items(), key=lambda x:x[1])
    
    # define help variables
    num = 0
    wait = 0
    start = complete_time_order[0][1]
    remained_jobs = w - len(job_complete_time)
    
    # update the completed jobs according to time and job thresholds
    complete_jobs.clear()
    for i in range(min(job_threshold,remained_jobs)):
        wait = (complete_time_order[i][1] - start)
        if(wait > time_threshold):
            global_time = start + time_threshold
            break
        global_time = complete_time_order[i][1]
        complete_jobs.append(complete_time_order[i][0])
        
    # find completed jobs and release the slots that have been occupied by the completed tasks
    num = 0
    for i in complete_jobs:
        if(stages[i] == total_stages[i]):
            complete_jobs.remove(i)
            job_complete_time[i] = complete_time_order[num][1]
            del complete_time[i]
        stages[i] += 1
        for j in occ_slots[i]:
            avai[j] += 1
        occ_slots[i].clear()
        num += 1
        
    return global_time, complete_jobs, stages, job_complete_time, avai, occ_slots

In [7]:
def set_next_iteration():
    # initialize variables
    tasks_new.clear()
    K = complete_jobs.copy()
    num = 0
    
    # add new stages of completed jobs to the scheduling pool
    for i in toy_data_dict.keys():
        L[num] = 0
        if num in complete_jobs:
            tasks_new.append(toy_data_dict[i]['stage'][str(stages[num])])
            L[num] = len(toy_data_dict[i]['stage'][str(stages[num])])
        else:
            tasks_new.append([])
        num += 1
        
    # reset M for new tasks to be scheduled
    M = 0
    for i in K:
        M += J * L[i]
    
    # reset e for new tasks 
    e.clear()
    num = 0
    for i in toy_data_dict.keys():
        exc = []
        for j in toy_data_dict[i]['Execution Time'].keys():
            if j in tasks_new[num]:
                exc.append(toy_data_dict[i]['Execution Time'][j])
        e.append(exc)
        num += 1
        
    # reset c for new tasks
    c.clear()
    for k in range(w):
        com = []
        for i in range(len(tasks_new[k])):
            compute = []
            for j in range(J):
                maximum = 0
                for loc in demand[tasks_new[k][i]]:
                    src = location[loc] - 1
                    time = demand[tasks_new[k][i]][loc]/bandwidth[src][j]
                    if(time > maximum):
                        maximum = time
                compute.append(maximum)
            com.append(compute)
        c.append(com)
        
    return K, tasks_new, L, M, e, c

# 全过程调度

In [8]:
# 第一轮数据初始化
K = []
for i in range(w):
    K.append(i)

avai = a.copy()

M = 0
for i in K:
    M += J*L[i]
    
tasks_new = tasks_1.copy()

In [9]:
# 第一轮迭代
for i in range(w):
    res = LP(K,L,avai,M)
    K, avai, M = update(K, avai, M)

Restricted license - for non-production use only - expires 2022-01-13
Gurobi Optimizer version 9.1.2 build v9.1.2rc0 (mac64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 229 rows, 312 columns and 624 nonzeros
Model fingerprint: 0xfdfcecbd
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+00, 6e+14]
  Bounds range     [0e+00, 0e+00]
  RHS range        [1e+00, 5e+00]
         Consider reformulating model or setting NumericFocus parameter
         to avoid numerical issues.
Presolve removed 208 rows and 208 columns
Presolve time: 0.01s
Presolved: 21 rows, 104 columns, 208 nonzeros

Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    1.0400000e+02   8.000000e+00   0.000000e+00      0s
       8    1.7322726e+09   0.000000e+00   0.000000e+00      0s

Solved in 8 iterations and 0.01 seconds
Optimal objective  1.732272583e+09
Gurobi Optimizer version 9.1.2 build v9.1.2rc0 (mac64)
Thread 

In [10]:
# 全过程
while(len(complete_time) != 0):
    global_time, complete_jobs, stages, job_complete_time, avai, occ_slots = release_resource()
    K, tasks_new, L, M, e, c = set_next_iteration()
    for i in range(len(complete_jobs)):
        res = LP(K,L,avai,M)
        K, avai, M = update(K, avai, M)

Gurobi Optimizer version 9.1.2 build v9.1.2rc0 (mac64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 256 rows, 351 columns and 702 nonzeros
Model fingerprint: 0xbd8c7347
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+00, 1e+11]
  Bounds range     [0e+00, 0e+00]
  RHS range        [1e+00, 5e+00]
         Consider reformulating model or setting NumericFocus parameter
         to avoid numerical issues.
Presolve removed 234 rows and 234 columns
Presolve time: 0.01s
Presolved: 22 rows, 117 columns, 234 nonzeros

Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    1.1700000e+02   9.000000e+00   0.000000e+00      0s
      11    1.4836697e+07   0.000000e+00   0.000000e+00      0s

Solved in 11 iterations and 0.01 seconds
Optimal objective  1.483669710e+07
Gurobi Optimizer version 9.1.2 build v9.1.2rc0 (mac64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads


Optimize a model with 40 rows, 39 columns and 78 nonzeros
Model fingerprint: 0x059a630b
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+00, 4e+03]
  Bounds range     [0e+00, 0e+00]
  RHS range        [1e+00, 4e+00]
Presolve removed 40 rows and 39 columns
Presolve time: 0.01s
Presolve: All rows and columns removed
Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    1.8100000e+02   0.000000e+00   0.000000e+00      0s

Solved in 0 iterations and 0.01 seconds
Optimal objective  1.810000000e+02
Gurobi Optimizer version 9.1.2 build v9.1.2rc0 (mac64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 121 rows, 156 columns and 312 nonzeros
Model fingerprint: 0xc47c4cda
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+00, 9e+12]
  Bounds range     [0e+00, 0e+00]
  RHS range        [1e+00, 5e+00]
         Consider reformulating model or setting NumericFocus para

# 结果一览

In [11]:
job_complete_time

{0: 2.0,
 1: 6.555555555555555,
 2: 9.805555555555555,
 3: 12.86111111111111,
 4: 21.210317460317462,
 5: 25.932539682539684}

In [12]:
location

{'A1': 1,
 'A2': 4,
 'B1': 1,
 'B2': 3,
 'C1': 2,
 'C2': 6,
 'D1': 5,
 'D2': 7,
 'D3': 9,
 'E1': 4,
 'E2': 6,
 'E3': 8,
 'E4': 11,
 'F1': 9,
 'F2': 13,
 'F3': 10,
 'F4': 12,
 'F5': 13,
 'tE1': 3,
 'tC1': 2,
 'tF1': 9,
 'tB1': 6,
 'tD1': 13,
 'tD2': 7,
 'tA1': 4,
 'tA2': 4,
 'tF2': 1,
 'tF3': 10,
 'tF4': 1,
 'tE2': 3,
 'tE3': 3,
 'tB2': 8,
 'tC2': 6,
 'tD3': 5,
 'tD4': 7,
 'tC3': 2,
 'tF5': 6,
 'tF6': 1,
 'tE4': 1,
 'tE5': 3,
 'tD5': 2,
 'tF7': 12,
 'tF8': 6,
 'tF9': 9,
 'tE6': 3}