# CPF-MAB主代码
- 让所有的客户端在一个组里面进行训练
- 获取每个客户端的reward数组
- 根据每个客户端的reward数组，使用K-Means算法进行分组，组数由肘部法则和公式确定
- 根据分组结果，重新分配客户端到不同的组里面
- 在每个组中训练客户端，直到迭代次数
- 获取每个组的regret数组，加在一起获得最终的regret

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

from bandits import *
from client import *
from server import *

### 设置所有需要的变量
- `T`：总轮数
- `T_EXPLORE`：分组前探索轮数
- `alpha`：用于训练的超参数
- `beta`：个性化倾向性参数
- `M`：客户端数量
- `K`：臂数量
- `means`：每个客户端每个臂的均值（客户端数量×臂数量）
- `fp(p)`：用于平衡全局和局部探索次数的函数

In [None]:
T = int(1e7)
T_EXPLORE = int(1e5)
alpha = 0.2
beta = -1 # defined below
# means = np.array([[1,   0,  0,  0,  0.9,    0.4,    0.35,   0.35,   0.5],
#                   [0,   1,  0,  0,  0.3,    0.9,    0.35,   0.3,    0.5],
#                   [0,   0,  1,  0,  0.35,   0.35,   0.9,    0.3,    0.5],
#                   [0,   0,  0,  1,  0.4,    0.3,    0.35,   0.9,    0.5]])
means = np.random.random((30, 5))
# means = np.load('means/optimal_means.npy')

M = means.shape[0]
K = means.shape[1]
np.save('means/latest_means.npy', means)
print(f'means: {means}')
print(f'Number of clients: {M}. Number of arms: {K}')
def fp(p):
    return 2**p*np.log(T)

### 将所有的客户端放在一起，进行训练
- 范围为所有客户端，初始化参数为全0，进行`T_EXPLORE`轮训练
- 将这次的`regret`进行可视化

In [None]:
pfed_0 = PFEDUCB(fp,
                 T_EXPLORE,
                 means,
                 alpha,
                 c_local_mean=np.zeros((M, K)),
                 c_global_mean=np.zeros((M, K)),
                 c_mixed_mean=np.zeros((M, K)))
mixed_regret_0, local_reward_0, global_reward_0, mixed_reward_0 = pfed_0.simulate()
c_local_mean, c_global_mean, c_mixed_mean = pfed_0.get_clients_data()

In [None]:
end_round_of_plot = -1
fig = plt.figure(1,figsize=(9,6))
plt.plot(mixed_regret_0[:end_round_of_plot], label=str(alpha))
plt.xlabel('$t$', fontsize=20)
plt.ylabel('Regret', fontsize=20)
plt.legend(fontsize=14)
plt.grid(alpha=0.5)
plt.show()

### 根据local_mean对每个客户端进行分组
- 根据不同分组数量计算`SSE`
- 根据`肘部法则`和`二阶差分结果*组数`的数组确定最佳分组数量
- 根据递推公式和个性化倾向性确定最终分组数量
- 按照最终分组数量的设定，使用`K-Means`进行分组

In [None]:
sse = []
k_range = range(1, M + 1)
for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=0).fit(c_local_mean)
    sse.append(kmeans.inertia_)

plt.figure(figsize=(8, 5))
plt.plot(k_range, sse, marker='o')
plt.xlabel('clusters')
plt.ylabel('SSE')
plt.title('Elbow Method for Optimal Clusters')
plt.grid(True)
plt.show()

first_derivate = np.diff(sse)
second_derivative = np.diff(first_derivate)
clusters_data = second_derivative * (np.arange(len(second_derivative)) + 1)
opt_clusters = np.argmax(clusters_data) + 1
print("Optimal number of clusters:", opt_clusters)
print(f'First derivative: {first_derivate}')

In [None]:
if beta == -1: # if beta is not specified, using beta below
    beta = 0.2

nclusters = 1
while True:
    if nclusters == opt_clusters:
        break
    if beta * (first_derivate[nclusters] / min(first_derivate[nclusters:])) * (opt_clusters) + 1 > nclusters:
        # Here, using min because first derivative is negative
        nclusters += 1
    else:
        break
print(f'Final number of clusters: {nclusters}, with beta value: {beta}, optimal clusters: {opt_clusters}')

In [None]:
kmeans = KMeans(n_clusters=nclusters, random_state=0).fit(c_local_mean)
labels = kmeans.labels_
print(f'分组结果: {labels}')

### 根据分组情况按组训练
- 将`means`数据按照分组进行划分，在dict中
- 遍历每个组并训练模型`T - T_EXPLORE`次

In [None]:
init_data = {}
for i in range(nclusters):
    init_data[i] = {
        'means': [],
        'client_index': [],
        'local_mean': [],
        'global_mean': [],
        'mixed_mean': []
    }
for i in range(M):
    init_data[labels[i]]['means'].append(means[i])
    init_data[labels[i]]['client_index'].append(i)
    init_data[labels[i]]['local_mean'].append(c_local_mean[i])
    init_data[labels[i]]['global_mean'].append(c_global_mean[i])
    init_data[labels[i]]['mixed_mean'].append(c_mixed_mean[i])
for i in range(nclusters):
    init_data[i]['means'] = np.array(init_data[i]['means'])
    init_data[i]['local_mean'] = np.array(init_data[i]['local_mean'])
    init_data[i]['global_mean'] = np.array(init_data[i]['global_mean'])
    init_data[i]['mixed_mean'] = np.array(init_data[i]['mixed_mean'])

In [None]:
pfed = []
result = {}
for cluster in range(nclusters):
    print(f'Simulating cluster {cluster}')
    pfed.append(PFEDUCB(fp,
                        T - T_EXPLORE,
                        init_data[cluster]['means'],
                        alpha,
                        init_data[cluster]['local_mean'],
                        init_data[cluster]['global_mean'],
                        init_data[cluster]['mixed_mean']))
    mixed_regret, local_reward, global_reward, mixed_reward = pfed[-1].simulate()
    result[cluster] = {
        'mixed_regret': mixed_regret,
        'local_reward': local_reward,
        'global_reward': global_reward,
        'mixed_reward': mixed_reward
    }
    del mixed_regret, local_reward, global_reward, mixed_reward

### 根据每组的运行结果汇总regret等信息
- 将每组的`mixed_regret`；`local_reward`；`global_reward`；`mixed_reward` 都加在一起
- 将`mixed_regret`进行可视化

In [None]:
mixed_regret = np.zeros_like(result[0]['mixed_regret'])
local_reward = np.zeros_like(result[0]['local_reward'])
global_reward = np.zeros_like(result[0]['global_reward'])
mixed_reward = np.zeros_like(result[0]['mixed_reward'])
for cluster in range(nclusters):
    mixed_regret += result[cluster]['mixed_regret']
    local_reward += result[cluster]['local_reward']
    global_reward += result[cluster]['global_reward']
    mixed_reward += result[cluster]['mixed_reward']
mixed_regret = mixed_regret + mixed_regret_0[-1]
combined_mixed_regret = np.concatenate((mixed_regret_0, mixed_regret))

In [None]:
end_round_of_plot = -1
fig = plt.figure(1,figsize=(9,6))
plt.plot(combined_mixed_regret[:end_round_of_plot], label=f'alpha={alpha}')
plt.xlabel('$t$', fontsize=20)
plt.ylabel('Regret', fontsize=20)
plt.legend(fontsize=14, loc='lower right')
plt.grid(alpha=0.5)
plt.show()