In [3]:
import numpy as np
import random
def generate_state(I_citys, L_levels, W_workdays, M_servers, x_max_task_num, H_home_of_server, lambd):
    """
    生成一个系统的随机状态，该系统负责将任务分配给员工。
    
    参数:
    I_citys (int): 城市的数量。
    L_levels (int): 等级的数量。
    W_workdays (int): 员工距离放假的最大工作天数, 一般为7。
    M_servers (int): 员工的数量。
    x_max_task_num (int): 每个城市的最大任务数(本函数中未直接使用)。
    H_home_of_server (list[int]): 代表每个员工家所在城市的列表。
    lambd (np.array): 一个二维数组 (I_citys x_max_task_num L_levels)，代表每个城市和等级的任务到达率。
    
    返回:
    tuple: 包含任务分布矩阵和员工状态列表的元组。

    城市中的任务矩阵，员工状态列表[(员工所在地，距离放假时间)]
    """

    # 生成任务分布矩阵 (n_il)，其维度为 I_citys x_max_task_num L_levels  
    n_il = np.zeros((I_citys, L_levels), dtype=int)  # 用零初始化矩阵
    for i_city in range(I_citys):  # 遍历城市
        for l_level in range(L_levels):  # 遍历等级
            # 为每个城市和等级分配一个基于泊松分布的随机数, 表示随机状态生成的任务数量？# TODO
            n_il[i_city, l_level] = np.random.poisson(lambd[i_city, l_level])
    S0_tasks = n_il  # 任务分布矩阵 (I_citys x_max_task_num L_levels), [0, +∞)
    
    # 初始化一个列表来保存每个员工的状态，给
    S1_servers = []
    for m_server in range(M_servers):  # 遍历所有员工

        # 为员工 'm_server' 随机选择距离放假的工作日数，取值范围 [0, W_workdays]
        w_m = np.random.randint(0, W_workdays + 1)

        # 根据距离放假天数w_m，得到位置i_m，如果距离放假0，则在家里，否则随机一个位置。
        # i_m [1, I_citys]
        if w_m == W_workdays:  # 如果距离放假时间为 0 天，即今天放假，则员工所在城市为家所在城市
            i_m = H_home_of_server[m_server]
        else:  # 否则，为员工随机选择一 个非家乡城市工作
            i_m = np.random.randint(1, I_citys + 1)
            
        # 将员工的状态作为元组（城市，距离放假的工作日数）添加到列表中
        S1_servers.append((i_m, w_m))
    
    # 将任务分布矩阵和员工状态列表合并成一个状态元组
    S = (S0_tasks, S1_servers)    # ((I_citys x_max_task_num L_levels), (M_servers x_max_task_num 1))

    return S  # 返回生成的状态

In [4]:
I_citys = 26
L_levels = 5
W_workdays = 6
M_servers = 40
x_max_task_num = 2
H_home_of_server = [1,2,3,4,5,6,7,8,9,10,1,2,3,4,5,6,7,8,9,10,1,2,3,4,5,6,7,8,9,10,1,2,3,4,5,6,7,8,9,10,]
lambd = np.random.rand(I_citys, L_levels)
state = generate_state(I_citys, L_levels, W_workdays, M_servers, x_max_task_num, H_home_of_server, lambd)
print(state)

(array([[0, 1, 1, 1, 1],
       [0, 3, 1, 3, 1],
       [0, 1, 1, 3, 2],
       [0, 0, 2, 0, 1],
       [0, 1, 0, 2, 0],
       [0, 0, 0, 0, 0],
       [0, 1, 0, 1, 0],
       [1, 0, 2, 1, 0],
       [0, 0, 1, 1, 1],
       [0, 0, 1, 1, 1],
       [1, 0, 0, 2, 1],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0],
       [0, 2, 0, 1, 0],
       [1, 1, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [2, 0, 1, 0, 2],
       [2, 0, 0, 1, 0],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0],
       [1, 0, 0, 3, 0],
       [0, 0, 0, 1, 0],
       [0, 1, 2, 1, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 1, 1],
       [0, 0, 0, 1, 1]]), [(14, 5), (26, 1), (19, 1), (22, 2), (6, 4), (10, 4), (7, 2), (5, 2), (17, 3), (22, 4), (1, 6), (18, 5), (14, 1), (3, 3), (17, 4), (22, 1), (9, 0), (26, 0), (3, 5), (21, 1), (23, 2), (13, 1), (23, 3), (15, 2), (24, 2), (17, 3), (24, 1), (15, 3), (22, 4), (15, 2), (8, 0), (23, 4), (26, 5), (26, 5), (21, 5), (26, 0), (3, 0), (10, 5), (21, 3), (26, 0)])


In [5]:
def aggreg_state(S, Z_cluster_num, X, M_servers, I_citys, L_levels):
    # 函数2
    # 定义一个函数，用于根据给定的参数将复杂的状态 S 压缩成一个简化的状态 barS。
    # 输入:
    #   S: 当前状态，一个复杂的结构，包含两部分信息：
    #      - 一个数组，表示每个城市每个等级的数量(n_il)。
    #      - 一个列表，表示服务员和他们服务的城市及工作日(i_m, w_m)
    #   Z_cluster_num: 一个整数，表示将城市分成多少个聚类。
    #   X: 一个整数，用于计算 N 矩阵中的元素值。
    #   M_servers: 服务员的总数。
    #   I_citys: 城市的总数。
    #   L_levels: 等级的总数。
    # 输出:
    #   barS: 一个元组，表示压缩后的状态，包含以下三个部分：
    #         - N: 一个二维数组，表示每个聚类的等级之和。
    #         - g: 一个数组，表示每个聚类的状态。
    #         - w: 一个整数，表示第一个服务员的工作日数。

    # 计算正在工作的服务员的数量, S[1][m_server][1]即S1_servers[m_server][1]即w_m
    barM = np.sum([1 for m_server in range(M_servers) if S[1][m_server][1] != 0]) # 距离放假时间不等于0
    # 根据城市数量和设定的簇数，将城市分成Z个簇
    cluster = split_list(I_citys, Z_cluster_num)
    # 计算实际的簇数，考虑到可能会有余数
    # num_cluster = divide_reminder(I_citys, Z_cluster_num) # 这个可以替换为下面的
    num_cluster = np.ceil(I_citys / Z_cluster_num).astype(int) # 向上取整
    # 初始化表示各簇状态的数组g
    g = np.zeros(num_cluster)
    
    # 压缩状态的第二部分：计算每个簇的状态
    for z_cluster in range(num_cluster):
        # 统计每个簇中有多少业务员正在工作 
        e_z = np.sum([1 for m_server in range(M_servers) if S[1][m_server][0] in cluster[z_cluster]])
        # 根据工作的业务员数量设置簇的状态
        if e_z == 0:
            g[z_cluster] = 0  # 无业务员工作
        elif e_z <= barM / num_cluster:
            g[z_cluster] = 1  # 工作业务员数量低于或等于平均值
        else:
            g[z_cluster] = 2  # 工作业务员数量高于平均值
    
    # 获取第一个业务员的工作量
    w = S[1][0][1]
    
    # 压缩状态的第一部分：计算每个簇中各等级的数量总和
    N = np.zeros((num_cluster, L_levels))  # 初始化N矩阵
    for z_cluster in range(num_cluster):
        for l in range(L_levels):
            # 对每个簇的每个等级，计算其数量总和，但不超过X
            N[z_cluster][l] = min(X, np.sum([S[0][i-1][l] for i in cluster[z_cluster]]))
            
    # 将计算出的N矩阵、簇的状态数组g和第一个业务员的工作量w组合成新的压缩状态barS
    barS = (N, g, w)        
    return barS

# 将城市列表平均分成Z个聚类
def split_list(I_citys, Z_cluster_num)->list:
    # 创建一个从1到I_citys的城市索引列表
    arr_city_idx = list(range(1, I_citys + 1))
    # 调用函数处理实际的分割
    return split_array_given_array(arr_city_idx, Z_cluster_num)



def split_array_given_array(arr_city_idx, Z_cluster_num)->list:
    """
    将输入数组分割成长度为 Z_cluster_num 的子数组列表。如果数组不能被 Z_cluster_num 整除，
    那么最后一个子数组将包含所有剩余的元素。
    
    参数:
    arr_city_idx (list): 需要被分割的输入数组。
    Z_cluster_num (int): 每个子数组的期望长度。
    
    返回值:
    list: 长度为 Z_cluster_num 的子数组列表，除了可能的最后一个子数组，它包含所有剩余的元素。
    """
    result = []  # 结果列表，用来存储所有的子数组
    quotient = len(arr_city_idx) // Z_cluster_num  # 计算整除的商，即完整子数组的数量
    remainder = len(arr_city_idx) % Z_cluster_num  # 计算余数，即最后一个子数组的元素数量
    
    # 划分可以整除的数组部分
    for i in range(quotient):
        sub_array = arr_city_idx[i * Z_cluster_num:(i + 1) * Z_cluster_num]  # 获取从 i*Z_cluster_num 到 (i+1)*Z_cluster_num 的子数组
        result.append(sub_array)  # 将子数组添加到结果列表中
    
    # 如果有余数，则处理剩余部分
    if remainder > 0:
        sub_array = arr_city_idx[-remainder:]  # 获取数组最后余数个元素形成的子数组
        result.append(sub_array)  # 将子数组添加到结果列表中
    
    return result  # 返回结果列表

def divide_reminder(num, divisor):
    """
    将一个整数除以另一个整数，并将结果向上取整。
    
    参数:
    num (int): 被除数。
    divisor (int): 除数。
    
    返回值:
    int: 向上取整后的商。
    """
    quotient = num // divisor  # 计算整除的商
    remainder = num % divisor  # 计算余数
    
    # 如果存在余数，则将商向上取整
    if remainder > 0:
        quotient += 1  # 余数大于0，商加一
    
    return quotient  # 返回向上取整后的商

In [6]:
Z_cluster_num=3
X=3
M_servers=40
I_citys=26
L_levels=5
barS=aggreg_state(state, Z_cluster_num, X, M_servers, I_citys, L_levels)
print(barS)

(array([[0., 3., 3., 3., 3.],
       [0., 1., 2., 2., 1.],
       [1., 1., 3., 3., 1.],
       [1., 0., 1., 3., 3.],
       [1., 3., 0., 1., 0.],
       [3., 0., 1., 1., 2.],
       [1., 0., 1., 3., 0.],
       [0., 1., 2., 2., 0.],
       [0., 0., 0., 2., 2.]]), array([2., 1., 1., 1., 2., 2., 2., 2., 2.]), 5)


In [7]:
# print(state)
print(barS)
S = state
# (位置i_m, 放假天数w_m)
mathscr_L = [server_restday[0] for server_restday in state[1]]
# print(f"{S=}\n {mathscr_L=}")

(array([[0., 3., 3., 3., 3.],
       [0., 1., 2., 2., 1.],
       [1., 1., 3., 3., 1.],
       [1., 0., 1., 3., 3.],
       [1., 3., 0., 1., 0.],
       [3., 0., 1., 1., 2.],
       [1., 0., 1., 3., 0.],
       [0., 1., 2., 2., 0.],
       [0., 0., 0., 2., 2.]]), array([2., 1., 1., 1., 2., 2., 2., 2., 2.]), 5)
S=(array([[0, 1, 1, 1, 1],
       [0, 3, 1, 3, 1],
       [0, 1, 1, 3, 2],
       [0, 0, 2, 0, 1],
       [0, 1, 0, 2, 0],
       [0, 0, 0, 0, 0],
       [0, 1, 0, 1, 0],
       [1, 0, 2, 1, 0],
       [0, 0, 1, 1, 1],
       [0, 0, 1, 1, 1],
       [1, 0, 0, 2, 1],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0],
       [0, 2, 0, 1, 0],
       [1, 1, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [2, 0, 1, 0, 2],
       [2, 0, 0, 1, 0],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0],
       [1, 0, 0, 3, 0],
       [0, 0, 0, 1, 0],
       [0, 1, 2, 1, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 1, 1],
       [0, 0, 0, 1, 1]]), [(14, 5), (26, 1), (19, 1), (22, 2), (6, 4), (10, 4), (7, 2

In [8]:
def f_4(S, mathscr_L):
    # 假设 len(mathscr_L) = 7
    # 假设 S 和 mathscr_L 的数据结构如所描述
    # S 是一个二元组 (S0_tasks, S1_servers)，其中 S0_tasks 是任务矩阵
    # mathscr_L 是业务员的等级集合，例如 [l_1, l_2, ..., l_M]
    #S0 (I_citys x_max_task_num L_levels)
    # 首先计算 N_1 和 N_2
    L_levels = 5
    N_1 = [sum(1 for l_m, (i_m, w_m) in zip(mathscr_L, S[1]) if l_m == j and w_m != 0) for j in range(1, L_levels+1)]
    N_2 = [sum(S[0][i][j] for i in range(len(S[0]))) for j in range(L_levels)]
    # print(f"{N_1=} {N_2=}")
    # 初始化分类后的等级列表
    mathcal_L = []
    current_class = []
    total_N_1 = 0
    total_N_2 = 0

    for j in range(1, L_levels+1):
        total_N_1 += N_1[j-1]
        total_N_2 += N_2[j-1]
        current_class.append(j)

        if total_N_1 <= total_N_2:
            # 当 N_1 总和小于等于 N_2 总和时，终止当前类的添加
            mathcal_L.append(tuple(current_class))
            current_class = []
            total_N_1 = 0
            total_N_2 = 0

    if current_class:
        # 添加最后一个类
        mathcal_L.append(tuple(current_class))

    return mathcal_L, N_1, N_2


In [9]:
S = state
# (位置i_m, 放假天数w_m)
mathscr_L = [1,1,1,2,3,4,5,2,3,5, 1,1,1,2,3,4,5,2,3,5,1,1,1,2,3,4,5,2,3,5,1,1,1,2,3,4,5,2,3,5,]
# print(f"{S=}\n {mathscr_L=}")
mathcal_L, N_1, N_2 = f_4(S, mathscr_L)
print(mathcal_L, N_1, N_2)

[(1, 2), (3,), (4,), (5,)] [11, 7, 8, 3, 5] [9, 11, 13, 24, 13]


In [10]:
len(S[0]),len(S[1])

(26, 40)

In [11]:
import pulp

def func3_fix(S, L_mathscr, H_home_of_server, r1, c1, c2):
    """
    生成状态 S 到决策 A 的函数，通过解决线性规划问题来最大化收益 R(S, A)。

    参数:
    S (tuple): 当前状态，包含任务矩阵和服务员信息。
    L_mathscr (list): 服务员的等级列表。
    H_home_of_server (list): 服务员的家位置列表。
    r1 (list): 每个等级的收益列表。
    c1 (list of list): I×I 的成本矩阵。
    c2 (float): 常数成本。

    返回:
    list: 最优决策 A，包含每个服务员的位置和等级。
    """
    n_il, servers_info = S
    
    # n_il, servers_info = S
    # M_servers = len(servers_info)  # 服务员数量
    # I_citys = len(n_il)          # 城市数量
    # # L_max = len(r1) - 1    # 最大等级

    M_servers = len(servers_info)  # 服务员数量
    I_citys = len(n_il)          # 城市数量
    L_max = max(L_mathscr) # 最大等级
    # print(f"{M_servers=} {I_citys=} {L_max=} {L_mathscr=}")

    # 创建问题实例
    prob = pulp.LpProblem("Optimal_Server_Assignment", pulp.LpMaximize)

    # 定义决策变量 y_{mil} 为二元变量
    y = pulp.LpVariable.dicts("y", 
                              ((m, i, l) for m in range(M_servers) for i in range(I_citys) for l in range(L_max+1)), 
                              cat=pulp.LpBinary)

    # M_servers=40 
    # I_citys=26 
    # L_max=5
    # 目标函数
    # 假设 L_max 和 r1 已经定义
    print("L_max:", L_max)
    print("Length of r1:", len(r1))

    # 遍历 r1 的元素，从索引 0 到 L_max（包含 L_max）
    # try:
    #     for l in range(L_max + 1):  # 注意这里使用 L_max + 1
    #         print(f"r1[{l}] = {r1[l]}")  # 打印每个元素来验证访问是否成功
    # except IndexError as e:
    #     print(f"IndexError: Trying to access r1[{l}], but it's out of range. Length of r1 is {len(r1)}.")
    prob += pulp.lpSum(
        r1[l1] * y[m1, i1, l1] - c1[servers_info[m1][0]-1][i1] * y[m1, i1, l1]
                       for m1 in range(M_servers) for i1 in range(I_citys)
                       for l1 in range(0, L_max+1))\
                        - c2 * pulp.lpSum(n_il[i][l-1] - pulp.lpSum(y[m, i, l] for m in range(M_servers)) for i in range(I_citys)
                       for l in range(1, L_max+1))

    # 添加约束
    # 每个服务员只能分配到一个地点和等级
    for m in range(M_servers):
        prob += pulp.lpSum(y[m, i, l] for i in range(I_citys) for l in range(L_max+1)) == 1

    # 服务员不工作时，分配到家乡的等级0
    for m, (im, wm) in enumerate(servers_info):
        if wm == 0:
            prob += y[m, H_home_of_server[m], 0] == 1

    # 服务员工作时，必须分配到合适的等级和城市
    for m, (im, wm) in enumerate(servers_info):
        if wm > 0:
            prob += pulp.lpSum(y[m, i, l] for i in range(I_citys) for l in range(L_mathscr[m], L_max+1) if n_il[i][l-1] > 0) == 1

    # 资源使用不超过可用数量
    for i in range(I_citys):
        for l in range(1, L_max+1):
            prob += pulp.lpSum(y[m, i, l] for m in range(M_servers)) <= n_il[i][l-1]

    # 求解问题
    prob.solve()

    # 解析结果
    result = [(m, i, l) for m in range(M_servers) for i in range(I_citys) for l in range(L_max+1) if pulp.value(y[m, i, l]) == 1]
    return result


In [12]:
S = state
n_il, servers_info = S
L_mathscr = mathscr_L
H_home_of_server = H_home_of_server
r1  = [0, 3500, 3000, 2500, 2000, 1500]
# 假设 I_citys 是城市的数量，这个值应该根据你的具体情况来设置
I_citys = len(n_il)  # 以 n_il 变量中元素的数量来确定城市数量

# 生成随机的成本矩阵 c1
c1 = [[0 if i == j else random.randint(100, 500) for j in range(I_citys)] for i in range(I_citys)]
c2 = 100
A =func3_fix(S, L_mathscr, H_home_of_server, r1, c1, c2)

L_max: 5
Length of r1: 6


In [13]:
len(A), A

(40,
 [(0, 13, 2),
  (1, 20, 1),
  (2, 14, 1),
  (3, 0, 2),
  (4, 9, 3),
  (5, 9, 4),
  (6, 9, 5),
  (7, 4, 2),
  (8, 16, 3),
  (9, 0, 5),
  (10, 7, 1),
  (11, 17, 1),
  (12, 13, 2),
  (13, 2, 2),
  (14, 22, 3),
  (15, 21, 4),
  (16, 7, 0),
  (17, 8, 0),
  (18, 2, 3),
  (19, 2, 5),
  (20, 10, 1),
  (21, 16, 1),
  (22, 17, 1),
  (23, 1, 2),
  (24, 1, 3),
  (25, 25, 4),
  (26, 1, 5),
  (27, 14, 2),
  (28, 0, 3),
  (29, 24, 5),
  (30, 1, 0),
  (31, 15, 1),
  (32, 16, 1),
  (33, 22, 2),
  (34, 18, 3),
  (35, 6, 0),
  (36, 7, 0),
  (37, 6, 2),
  (38, 8, 3),
  (39, 10, 0)])

In [14]:
mathcal_L

[(1, 2), (3,), (4,), (5,)]

In [15]:
import pulp

def fun5(S, mathcal_L, N1, N2):
    n_il, servers_info = S
    M_servers = len(servers_info)  # 服务员数量
    I_citys = len(n_il)          # 城市数量
    L_max = max(mathcal_L)         # 最大等级

    # 步骤1：安排所有放假的员工回家
    C_h = sum(c1[servers_info[m][0]-1][H_home_of_server[m]] for m in range(M_servers) if servers_info[m][1] == 0)

    # 初始化最终的分配结果
    Y = [None] * M_servers

    # 步骤2：安排每个等级类中的员工
    for level_class in mathcal_L:
        M_servers_L = [m for m in range(M_servers) if mathcal_L[m] in level_class and servers_info[m][1] > 0]
        I_citys_L = [i for i in range(I_citys) if any(n_il[i][l-1] > 0 for l in level_class)]

        if N1[level_class[0]-1] <= N2[level_class[0]-1]:  # 等级类为 "≤"
            prob = pulp.LpProblem("Optimal_Server_Assignment_1", pulp.LpMaximize)
            y = pulp.LpVariable.dicts("y", ((m, i, l) for m in M_servers_L for i in I_citys_L for l in level_class), cat=pulp.LpBinary)

            prob += pulp.lpSum(r1[l] * y[m, i, l] - c1[servers_info[m][0]-1][i] * y[m, i, l]
                               for m in M_servers_L for i in I_citys_L for l in level_class) \
                    - c2 * pulp.lpSum(n_il[i][l-1] - pulp.lpSum(y[m, i, l] for m in M_servers_L) for i in I_citys_L for l in level_class)

            for m in M_servers_L:
                prob += pulp.lpSum(y[m, i, l] for i in I_citys_L for l in level_class) == 1

            for i in I_citys_L:
                for l in level_class:
                    prob += pulp.lpSum(y[m, i, l] for m in M_servers_L) <= n_il[i][l-1]

        else:  # 等级类为 ">"
            prob = pulp.LpProblem("Optimal_Server_Assignment_2", pulp.LpMaximize)
            y = pulp.LpVariable.dicts("y", ((m, i, l) for m in M_servers_L for i in I_citys_L for l in level_class), cat=pulp.LpBinary)

            prob += pulp.lpSum(r1[l] * y[m, i, l] - c1[servers_info[m][0]-1][i] * y[m, i, l]
                               for m in M_servers_L for i in I_citys_L for l in level_class)

            for m in M_servers_L:
                prob += pulp.lpSum(y[m, i, l] for i in I_citys_L for l in level_class) <= 1

            for i in I_citys_L:
                for l in level_class:
                    prob += pulp.lpSum(y[m, i, l] for m in M_servers_L) == n_il[i][l-1]

        prob.solve()

        for m in M_servers_L:
            for i in I_citys_L:
                for l in level_class:
                    if pulp.value(y[m, i, l]) == 1:
                        Y[m] = (i, l)

    # 步骤三：计算最终的目标函数值
    R_1 = sum(r1[l] * pulp.value(y[m, i, l]) - c1[servers_info[m][0]-1][i] * pulp.value(y[m, i, l])
              for level_class in mathcal_L if N1[level_class[0]-1] <= N2[level_class[0]-1]
              for m in [m for m in range(M_servers) if mathcal_L[m] in level_class and servers_info[m][1] > 0]
              for i in [i for i in range(I_citys) if any(n_il[i][l-1] > 0 for l in level_class)]
              for l in level_class)
    R_2 = sum(r1[l] * pulp.value(y[m, i, l]) - c1[servers_info[m][0]-1][i] * pulp.value(y[m, i, l])
              for level_class in mathcal_L if N1[level_class[0]-1] > N2[level_class[0]-1]
              for m in [m for m in range(M_servers) if mathcal_L[m] in level_class and servers_info[m][1] > 0]
              for i in [i for i in range(I_citys) if any(n_il[i][l-1] > 0 for l in level_class)]
              for l in level_class)
    R = R_1 + R_2 - C_h

    return Y, R

In [16]:
mathcal_L

[(1, 2), (3,), (4,), (5,)]

In [41]:
import pulp

def func5(S, mathcal_L, mathscr_L, N_1, N_2, H_home_of_server, r1, c1, c2):
    """
    生成状态 S 到决策 Y 的函数,通过解决线性规划问题来最大化收益 R(S, Y)。

    参数:
    S (tuple): 当前状态,包含任务矩阵和服务员信息。
    mathcal_L (list): 分类后的等级列表。
    mathscr_L (list): 所有服务员的等级列表
    N_1 (list): 每个等级的服务员数量。
    N_2 (list): 每个等级的任务数量。
    H_home_of_server (list): 服务员的家位置列表。
    r1 (list): 每个等级的收益列表。
    c1 (list of list): I×I 的成本矩阵。
    c2 (float): 常数成本。

    返回:
    list: 最优决策 Y,包含每个服务员的位置和等级。
    """
    n_il, servers_info = S
    M_servers = len(servers_info)  # 服务员数量
    I_citys = len(n_il)          # 城市数量
    L_max = [max(l) for l in mathcal_L]  # 最大等级
    print("M_servers, I_citys, L_max, N_1, N_2, mathcal_L, mathscr_L", M_servers, I_citys, L_max, N_1, N_2, mathcal_L, mathscr_L)
    # 步骤1:安排放假的员工回家
    C_h = sum(c1[servers_info[m][0]-1][H_home_of_server[m]] for m in range(M_servers) if servers_info[m][1] == 0)
    
    total_revenue = -C_h  # 初始化总收益为负的回家成本

    Y = [None] * M_servers  # 初始化最优决策 Y
    
    Y_set = []
    # 步骤2:对每个等级类独立进行员工分配
    for L_set, l_max_L in zip(mathcal_L, L_max):
        print("L_set ", L_set)
        M_servers_L = [m for m in range(M_servers) if servers_info[m][1] > 0 and mathscr_L[m] in L_set]  # 该等级类下工作的员工集合
        I_citys_L = [i for i in range(I_citys) if any(n_il[i][l-1] > 0 for l in L_set)]  # 该等级类下有任务需求的城市集合
        print("M_servers_L, I_citys_L", M_servers_L, I_citys_L)
        # 创建问题实例
        prob = pulp.LpProblem(f"Optimal_Server_Assignment_Level_{L_set}", pulp.LpMaximize)

        # 定义决策变量 y_{mil} 为二元变量
        y = pulp.LpVariable.dicts("y", 
                                  ((m, i, l) for m in M_servers_L for i in I_citys_L for l in L_set), 
                                  cat=pulp.LpBinary)

        # 目标函数
        if sum(N_1[l-1] for l in L_set) <= sum(N_2[l-1] for l in L_set):  # 等级类型为"≤"
            prob += pulp.lpSum(
                r1[l1-1] * y[m1, i1, l1] - c1[servers_info[m1][0]-1][i1] * y[m1, i1, l1]
                for m1 in M_servers_L for i1 in I_citys_L for l1 in L_set) \
                - c2 * pulp.lpSum(n_il[i][l-1] - pulp.lpSum(y[m, i, l] for m in M_servers_L) 
                                  for i in I_citys_L for l in L_set)
        else:  # 等级类型为">"
            prob += pulp.lpSum(
                r1[l1-1] * y[m1, i1, l1] - c1[servers_info[m1][0]-1][i1] * y[m1, i1, l1]
                for m1 in M_servers_L for i1 in I_citys_L for l1 in L_set)

        # 添加约束
        for m in M_servers_L: 
            # 每个工作中的服务员 m,要求其被分配到城市 i 提供的服务等级 l 必须不低于他自身的服务等级 L_mathscr[m]
            # 且只能被分配到一个城市提供一种等级的服务。
            prob += pulp.lpSum(y[m, i, l] for i in I_citys_L for l in L_set if l >= mathscr_L[m]) == 1

        for i in I_citys_L:
            for l in L_set:
                if sum(N_1[l-1] for l in L_set) <= sum(N_2[l-1] for l in L_set):  # 等级类型为"≤"
                    prob += pulp.lpSum(y[m, i, l] for m in M_servers_L) <= n_il[i][l-1]
                else:  # 等级类型为">"
                    prob += pulp.lpSum(y[m, i, l] for m in M_servers_L) == n_il[i][l-1]

        # 求解问题
        prob.solve()

        
        for m in M_servers_L:
            for i in I_citys_L:
                for l in L_set:
                    Y_sub_set = []
                    if pulp.value(y[m, i, l]) == 1:
                        Y[m] = (i+1, l)  # 城市编号从1开始
                        break

        # 更新总收益
        total_revenue += pulp.value(prob.objective)

        # 提取结果
        # Y_L = [(i, l) for m in M_servers_L for i in I_citys_L for l in L_set if y[m, i, l].value() == 1]
        # Y_set.append(Y_L)

    
    # 步骤3:安排放假的员工
    for m in range(M_servers):
        if servers_info[m][1] == 0:
            Y[m] = (H_home_of_server[m]+1, 0)  # 城市编号从1开始
    # # 步骤3:计算总收益
    # R = sum(prob.objective.value() for L in L_set) - C_h
    #     Y_set = []
        # 解析结果


    return Y, total_revenue

In [42]:
Y, total_revenue = func5(S,mathcal_L, mathscr_L, N_1, N_2, H_home_of_server, r1, c1, c2)

M_servers, I_citys, L_max, N_1, N_2, mathcal_L, mathscr_L 40 26 [2, 3, 4, 5] [11, 7, 8, 3, 5] [9, 11, 13, 24, 13] [(1, 2), (3,), (4,), (5,)] [1, 1, 1, 2, 3, 4, 5, 2, 3, 5, 1, 1, 1, 2, 3, 4, 5, 2, 3, 5, 1, 1, 1, 2, 3, 4, 5, 2, 3, 5, 1, 1, 1, 2, 3, 4, 5, 2, 3, 5]
L_set  (1, 2)
M_servers_L, I_citys_L [0, 1, 2, 3, 7, 10, 11, 12, 13, 20, 21, 22, 23, 27, 31, 32, 33, 37] [0, 1, 2, 4, 6, 7, 10, 13, 14, 15, 16, 17, 20, 22]
L_set  (3,)
M_servers_L, I_citys_L [4, 8, 14, 18, 24, 28, 34, 38] [0, 1, 2, 3, 7, 8, 9, 16, 18, 22]
L_set  (4,)
M_servers_L, I_citys_L [5, 15, 25] [0, 1, 2, 4, 6, 7, 8, 9, 10, 13, 17, 20, 21, 22, 24, 25]
L_set  (5,)
M_servers_L, I_citys_L [6, 9, 19, 26, 29] [0, 1, 2, 3, 8, 9, 10, 11, 16, 24, 25]




In [43]:
Y, len(Y), total_revenue

([(14, 2),
  (17, 1),
  (1, 2),
  (2, 2),
  (10, 3),
  (10, 4),
  (10, 5),
  (5, 2),
  (17, 3),
  (1, 5),
  (8, 1),
  (18, 1),
  (14, 2),
  (3, 2),
  (23, 3),
  (22, 4),
  (8, 0),
  (9, 0),
  (3, 3),
  (3, 5),
  (18, 1),
  (17, 1),
  (23, 2),
  (15, 2),
  (2, 3),
  (26, 4),
  (2, 5),
  (15, 1),
  (1, 3),
  (25, 5),
  (2, 0),
  (11, 1),
  (16, 1),
  (2, 2),
  (19, 3),
  (7, 0),
  (8, 0),
  (7, 2),
  (9, 3),
  (11, 0)],
 40,
 70807.0)

In [44]:
def func6(S, mathcal_L, mathscr_L, N_1, N_2):
    """
    生成状态 S 的决策空间 A,满足约束条件。

    参数:
    S (tuple): 当前状态,包含任务矩阵和服务员信息。
    mathcal_L (list): 分类后的等级列表。
    mathscr_L (list): 所有服务员的等级列表
    N_1 (list): 每个等级的服务员数量。
    N_2 (list): 每个等级的任务数量。

    返回:
    list: 决策空间 A,包含每个服务员的所有可能决策。
    """
    n_il, servers_info = S
    M_servers = len(servers_info)  # 服务员数量
    I_citys = len(n_il)          # 城市数量

    A = [[] for _ in range(M_servers)]  # 初始化决策空间 A

    # 对每个服务员生成可能的决策
    for m in range(M_servers):
        if servers_info[m][1] == 0:  # 服务员 m 放假
            A[m].append((servers_info[m][0], 0))  # 放假的服务员只有一个决策,即回家
        else:  # 服务员 m 工作
            for L_set in mathcal_L:  # 遍历每个等级类
                if mathscr_L[m] in L_set:  # 如果服务员 m 的等级属于当前等级类
                    for i in range(I_citys):  # 遍历每个城市
                        for l in L_set:  # 遍历当前等级类的每个等级
                            if l >= mathscr_L[m]:  # 如果当前等级不低于服务员 m 的等级
                                if (sum(N_1[l-1] for l in L_set) <= sum(N_2[l-1] for l in L_set) and 
                                    sum(1 for a in A[m] if a[0] == i+1) < sum(n_il[i][l-1] for l in L_set)) or \
                                   (sum(N_1[l-1] for l in L_set) > sum(N_2[l-1] for l in L_set) and
                                    sum(1 for a in A[m] if a[0] == i+1 and a[1] == l) < n_il[i][l-1]):
                                    # 如果满足约束条件,则将决策添加到服务员 m 的决策空间中
                                    A[m].append((i+1, l))  # 城市编号从1开始

    return A

In [45]:
A_all = func6(S, mathcal_L, mathscr_L, N_1, N_2)
len(A_all)

40

In [46]:
A_all

[[(1, 1),
  (2, 1),
  (2, 2),
  (3, 1),
  (5, 1),
  (7, 1),
  (8, 1),
  (11, 1),
  (14, 1),
  (14, 2),
  (15, 1),
  (15, 2),
  (16, 1),
  (17, 1),
  (17, 2),
  (18, 1),
  (18, 2),
  (21, 1),
  (23, 1)],
 [(1, 1),
  (2, 1),
  (2, 2),
  (3, 1),
  (5, 1),
  (7, 1),
  (8, 1),
  (11, 1),
  (14, 1),
  (14, 2),
  (15, 1),
  (15, 2),
  (16, 1),
  (17, 1),
  (17, 2),
  (18, 1),
  (18, 2),
  (21, 1),
  (23, 1)],
 [(1, 1),
  (2, 1),
  (2, 2),
  (3, 1),
  (5, 1),
  (7, 1),
  (8, 1),
  (11, 1),
  (14, 1),
  (14, 2),
  (15, 1),
  (15, 2),
  (16, 1),
  (17, 1),
  (17, 2),
  (18, 1),
  (18, 2),
  (21, 1),
  (23, 1)],
 [(1, 2),
  (2, 2),
  (3, 2),
  (5, 2),
  (7, 2),
  (8, 2),
  (11, 2),
  (14, 2),
  (15, 2),
  (16, 2),
  (17, 2),
  (18, 2),
  (21, 2),
  (23, 2)],
 [(1, 3),
  (2, 3),
  (3, 3),
  (4, 3),
  (8, 3),
  (9, 3),
  (10, 3),
  (17, 3),
  (19, 3),
  (23, 3)],
 [(1, 4),
  (2, 4),
  (3, 4),
  (5, 4),
  (7, 4),
  (8, 4),
  (9, 4),
  (10, 4),
  (11, 4),
  (14, 4),
  (18, 4),
  (21, 4),
  (22, 4),
 

In [26]:
import numpy as np

def f_7(T, x_max_task_num, lambda_il):
    # 生成了每日新到达的任务?
    # T: 表示时间周期，例如天数
    # x_max_task_num: 矩阵元素的最大取值
    # lambda_il: 泊松分布的率参数矩阵 (I_citys x_max_task_num L_levels)

    # 获取 lambda_il 的维度为 I_citys 和 L_levels
    I_citys, L_levels = lambda_il.shape

    # 初始化三维数组
    arriving_tasks_i = np.zeros((T, I_citys, L_levels), dtype=int)
    
    # 生成每个时间步的 I_citys x_max_task_num L_levels 矩阵
    for t in range(T):
        for i in range(I_citys):
            for l in range(L_levels):
                # 使用泊松分布生成矩阵元素
                arriving_tasks_i[t, i, l] = min(np.random.poisson(lambda_il[i, l]), x_max_task_num)
    
    return arriving_tasks_i



In [28]:
# 示例参数
T = 7  # 时间步数量
x_max_task_num = 3  # 最大值
I_citys = 40  # 城市数量
L_levels = 5  # 等级数量
lambda_il = np.random.rand(I_citys, L_levels)  # 生成率参数矩阵

# 生成arriving_tasks_i
arriving_tasks_i = f_7(T, x_max_task_num, lambda_il)
print(arriving_tasks_i.shape, arriving_tasks_i)

(7, 40, 5) [[[3 1 3 2 1]
  [1 0 0 1 1]
  [0 1 2 0 0]
  ...
  [2 0 1 0 0]
  [0 0 0 0 0]
  [1 0 1 0 0]]

 [[0 2 0 3 1]
  [1 0 1 1 0]
  [1 1 1 0 1]
  ...
  [0 0 0 1 0]
  [0 0 0 0 1]
  [0 1 0 1 0]]

 [[0 0 2 0 1]
  [0 2 1 1 1]
  [0 1 0 0 0]
  ...
  [0 0 1 0 0]
  [0 0 0 0 0]
  [0 0 0 1 0]]

 ...

 [[0 1 2 1 2]
  [1 0 0 2 1]
  [1 0 1 0 3]
  ...
  [0 0 0 1 2]
  [1 0 0 0 0]
  [1 0 0 0 0]]

 [[1 1 1 0 0]
  [2 0 0 2 0]
  [0 0 1 2 3]
  ...
  [0 0 1 0 0]
  [0 1 0 0 1]
  [0 0 1 2 0]]

 [[1 0 0 0 0]
  [0 0 0 0 0]
  [1 1 0 1 0]
  ...
  [0 0 0 0 0]
  [0 0 0 0 0]
  [0 0 0 2 1]]]


In [None]:
func1 = generate_state
func2 = aggreg_state
func3 = func3_fix
func4 = f_4
func5 = func5
# func6 = 
func7 = f_7

In [None]:
#函数8
import numpy as np



for j in range(J):
    s0 = func1(I_citys, L_levels, W_workdays, M_servers, x_max_task_num, H_home_of_server, lambd)
    xi = func7(T, x_max_task_num, lambda_il)
    R=[0]*T
    for t in range(T):
        action = func3(s0, L_mathscr, H_home_of_server, r1, c1, c2)
        R[t] = revenue(s0,action)
        s1 = state_trans(s0,action,xi[t])
        
        



def revenue(st,at):
    


def state_trans(S0,act,xi):   #状态转移
    dic1 = {}
    for i, row in enumerate(S0):
        for j, value in enumerate(row):
            dic1[(i+1, j+1)] = value
    dic2 = {(x[1], x[2]): 1 for x in act if x[2] != 0}
    S_A_cell = {}
    for key in dic1:
        if key in dic2:
            S_A_cell[key] = dic1[key] - dic2[key]
        else:
            S_A_cell[key] = dic1[key]
    S_A = [[0] *len(S0[0]) for _ in range(len(S0))]
    for key, value in S_A_cell.items():
        S_A[key[0]-1][key[1]-1] = value
        
    result = np.add(S_A,xi)
    return result

In [None]:
import numpy as np

def state_trans(S0,act,xi):
    dic1 = {}
    for i, row in enumerate(S0):
        for j, value in enumerate(row):
            dic1[(i+1, j+1)] = value
    dic2 = {(x[1], x[2]): 1 for x in act if x[2] != 0}
    S_A_cell = {}
    for key in dic1:
        if key in dic2:
            S_A_cell[key] = dic1[key] - dic2[key]
        else:
            S_A_cell[key] = dic1[key]
    S_A = [[0] *len(S0[0]) for _ in range(len(S0))]
    for key, value in S_A_cell.items():
        S_A[key[0]-1][key[1]-1] = value
        
    result = np.add(S_A,xi)
    return result

In [None]:
S=state[0]
act = A
xi = arriving_tasks_i[0]
tra = state_trans(S,act,xi)
print(tra)