In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

from bandits import *
from client import *
from server import *

T = int(1e6)
T_EXPLORE = int(1e5)

# group1 = np.array([
#     [0.8, 0.7, 0.2, 0.0, 0.0],
#     [0.75, 0.65, 0.25, 0.0, 0.0],
#     [0.78, 0.68, 0.22, 0.0, 0.0],
#     [0.76, 0.66, 0.24, 0.0, 0.0],
#     [0.77, 0.67, 0.23, 0.0, 0.0],
#     [0.79, 0.69, 0.21, 0.0, 0.0],
#     [0.74, 0.64, 0.26, 0.0, 0.0]
# ])

# group2 = np.array([
#     [0.0, 0.0, 0.8, 0.7, 0.0],
#     [0.0, 0.0, 0.75, 0.65, 0.0],
#     [0.0, 0.0, 0.78, 0.68, 0.0],
#     [0.0, 0.0, 0.76, 0.66, 0.0],
#     [0.0, 0.0, 0.77, 0.67, 0.0],
#     [0.0, 0.0, 0.79, 0.69, 0.0],
#     [0.0, 0.0, 0.74, 0.64, 0.0]
# ])

# group3 = np.array([
#     [0.0, 0.0, 0.0, 0.8, 0.7],
#     [0.0, 0.0, 0.0, 0.75, 0.65],
#     [0.0, 0.0, 0.0, 0.78, 0.68],
#     [0.0, 0.0, 0.0, 0.76, 0.66],
#     [0.0, 0.0, 0.0, 0.77, 0.67],
#     [0.0, 0.0, 0.0, 0.79, 0.69]
# ])
#
# means = np.vstack((group1, group2, group3))

means = np.random.random((30, 5))
# means = np.load('means/optimal_means.npy')

alpha = 0.9
# alpha_list = [0.2, 0.5, 0.9]
# beta = 0.9
beta_list = [0.2, 0.6, 1.0]

M = means.shape[0]
K = means.shape[1]
np.save('means/latest_means.npy', means)
print(f'means: {means}')
print(f'Number of clients: {M}. Number of arms: {K}')
def fp(p):
    return 2**p*np.log(T)

regret = {}

In [None]:
for beta in beta_list:
    pfed_0 = PFEDUCB(fp,
                    T_EXPLORE,
                    means,
                    alpha,
                    c_local_mean=np.zeros((M, K)),
                    c_global_mean=np.zeros((M, K)),
                    c_mixed_mean=np.zeros((M, K)))
    mixed_regret_0, local_reward_0, global_reward_0, mixed_reward_0 = pfed_0.simulate()
    c_local_mean, c_global_mean, c_mixed_mean = pfed_0.get_clients_data()

    sse = []
    k_range = range(1, M + 1)
    for k in k_range:
        kmeans = KMeans(n_clusters=k, random_state=0).fit(c_local_mean)
        sse.append(kmeans.inertia_)

    first_derivate = np.diff(sse)
    second_derivative = np.diff(first_derivate)
    clusters_data = second_derivative * (np.arange(len(second_derivative)) + 1)
    opt_clusters = np.argmax(clusters_data) + 1
    print("Optimal number of clusters:", opt_clusters)
    print(f'First derivative: {first_derivate}')

    nclusters = 1
    while True:
        if nclusters == opt_clusters:
            break
        if beta * (first_derivate[nclusters] / min(first_derivate[nclusters:])) * (opt_clusters) + 1 > nclusters:
            # Here, using min because first derivative is negative
            nclusters += 1
        else:
            break
    print(f'Final number of clusters: {nclusters}, with beta value: {beta}, optimal clusters: {opt_clusters}')

    kmeans = KMeans(n_clusters=nclusters, random_state=0).fit(c_local_mean)
    labels = kmeans.labels_
    print(f'分组结果: {labels}')

    init_data = {}
    for i in range(nclusters):
        init_data[i] = {
            'means': [],
            'client_index': [],
            'local_mean': [],
            'global_mean': [],
            'mixed_mean': []
        }
    for i in range(M):
        init_data[labels[i]]['means'].append(means[i])
        init_data[labels[i]]['client_index'].append(i)
        init_data[labels[i]]['local_mean'].append(c_local_mean[i])
        init_data[labels[i]]['global_mean'].append(c_global_mean[i])
        init_data[labels[i]]['mixed_mean'].append(c_mixed_mean[i])
    for i in range(nclusters):
        init_data[i]['means'] = np.array(init_data[i]['means'])
        init_data[i]['local_mean'] = np.array(init_data[i]['local_mean'])
        init_data[i]['global_mean'] = np.array(init_data[i]['global_mean'])
        init_data[i]['mixed_mean'] = np.array(init_data[i]['mixed_mean'])

    pfed = []
    result = {}
    for cluster in range(nclusters):
        print(f'Simulating cluster {cluster}')
        pfed.append(PFEDUCB(fp,
                            T - T_EXPLORE,
                            init_data[cluster]['means'],
                            alpha,
                            init_data[cluster]['local_mean'],
                            init_data[cluster]['global_mean'],
                            init_data[cluster]['mixed_mean']))
        mixed_regret, local_reward, global_reward, mixed_reward = pfed[-1].simulate()
        result[cluster] = {
            'mixed_regret': mixed_regret,
            'local_reward': local_reward,
            'global_reward': global_reward,
            'mixed_reward': mixed_reward
        }
        del mixed_regret, local_reward, global_reward, mixed_reward

    mixed_regret = np.zeros_like(result[0]['mixed_regret'])
    local_reward = np.zeros_like(result[0]['local_reward'])
    global_reward = np.zeros_like(result[0]['global_reward'])
    mixed_reward = np.zeros_like(result[0]['mixed_reward'])
    for cluster in range(nclusters):
        mixed_regret += result[cluster]['mixed_regret']
        local_reward += result[cluster]['local_reward']
        global_reward += result[cluster]['global_reward']
        mixed_reward += result[cluster]['mixed_reward']
    mixed_regret = mixed_regret + mixed_regret_0[-1]
    combined_mixed_regret = np.concatenate((mixed_regret_0, mixed_regret))
    regret[beta] = combined_mixed_regret
    print(f'beta: {beta} finished.')

In [None]:
end_round_of_plot = -1
fig = plt.figure(1,figsize=(9,6))
for beta in [0.2, 0.6, 1.0]:
    plt.plot(regret[beta][:end_round_of_plot], label=f'beta={beta}')
plt.xlabel('$t$', fontsize=20)
plt.ylabel('Regret', fontsize=20)
plt.legend(fontsize=14, loc='lower right')
plt.grid(alpha=0.5)
plt.show()