In [2]:
import numpy as np
import matplotlib.pyplot as plt
import tqdm
from bandits import *
from client import *
from server import *

Performance

In [None]:
T = int(1e6) # sum of rounds
ENABLE_WARM_START = False # warm start
alpha_list = np.array([0, 0.2, 0.5, 0.9, 1]) # alpha(hyperparameter)

# means = np.array([[1,   0,  0,  0,  0.9,    0.4,    0.35,   0.35,   0.5],
#                   [0,   1,  0,  0,  0.3,    0.9,    0.35,   0.3,    0.5],
#                   [0,   0,  1,  0,  0.35,   0.35,   0.9,    0.3,    0.5],
#                   [0,   0,  0,  1,  0.4,    0.3,    0.35,   0.9,    0.5]])

# means = np.random.random((M, K))

means = np.load('means.npy')
print(means)

M = means.shape[0] # 客户端数量
K = means.shape[1] # 臂数量
n_simu = 5 # 每个alpha的模拟次数

print(f'M = {M}, K = {K}')

def fp(p): # f(p) = 2^p * log(T)
    return 2**p*np.log(T)

[[0.96474458 0.09377086 0.85638765 0.55506951 0.20336919 0.32263036
  0.06530855 0.35435463 0.30264855]
 [0.6808939  0.42081861 0.74369719 0.01659674 0.99540671 0.78499021
  0.87432894 0.38923508 0.40590047]
 [0.99413358 0.44960078 0.93760862 0.53724053 0.35800793 0.55512393
  0.3811849  0.09878918 0.95534792]
 [0.94559346 0.38608158 0.31329797 0.35656277 0.16833863 0.63256776
  0.68181371 0.90621763 0.82996426]]
M = 4, K = 9


In [None]:
if not ENABLE_WARM_START:
    regret_reward = {}
    raw_regret = {}
    local_reward = {}
    global_reward = {}
    mixed_reward = {}
else:
    # try to load for a warm restart
    try:
        regret_reward = np.load('scores/regret_reward.npy',allow_pickle=True).item()
    except FileNotFoundError:
        regret_reward = {}

    try:
        raw_regret = np.load('scores/raw_regret.npy',allow_pickle=True).item()
    except FileNotFoundError:
        raw_regret = {}

    try:
        local_reward = np.load('scores/local_reward.npy',allow_pickle=True).item()
    except FileNotFoundError:
        local_reward = {}

    try:
        global_reward = np.load('scores/global_reward.npy',allow_pickle=True).item()
    except FileNotFoundError:
        global_reward = {}

    try:
        mixed_reward = np.load('scores/mixed_reward.npy',allow_pickle=True).item()
    except FileNotFoundError:
        mixed_reward = {}

In [None]:
try:
    for alpha in alpha_list:
        try:
            # the number of runs to run for this algo
            nalgo = n_simu-len(raw_regret[str(round(alpha,2))])
            if nalgo <= 0:
                print(f'Using existing data for alpha: {alpha}')
                continue
        except KeyError:
            # case of algo not loaded (ie 0 run already done)
            raw_regret[str(round(alpha,2))] = []
            local_reward[str(round(alpha,2))] = []
            global_reward[str(round(alpha,2))] = []
            mixed_reward[str(round(alpha,2))] = []
            nalgo = n_simu

        for i in tqdm.tqdm(range(nalgo), desc="Simulating {}".format("P-Fed-UCB, alpha:"+str(alpha))):
            pfed = PFEDUCB(fp, T, means, alpha) # simulate a run with the chosen parameters
            mixed_regret_1, local_reward_1, global_reward_1, mixed_reward_1 = pfed.simulate()
            raw_regret[str(round(alpha,2))].append(mixed_regret_1)
            local_reward[str(round(alpha,2))].append(local_reward_1)
            global_reward[str(round(alpha,2))].append(global_reward_1)
            mixed_reward[str(round(alpha,2))].append(mixed_reward_1)

            regret_reward[str(round(alpha,2))] = [np.mean(raw_regret[str(round(alpha,2))], axis=0),
                                                  np.std(raw_regret[str(round(alpha,2))], axis=0),
                                                  np.min(raw_regret[str(round(alpha,2))], axis=0),
                                                  np.max(raw_regret[str(round(alpha,2))], axis=0),
                                                  np.mean(local_reward[str(round(alpha,2))], axis=0),
                                                  np.mean(global_reward[str(round(alpha,2))], axis=0),
                                                  np.mean(mixed_reward[str(round(alpha,2))], axis=0)]

            del mixed_regret_1, local_reward_1, global_reward_1, mixed_reward_1

    # also saves the scores if we ran the whole algorithm without interrupting
    np.save('scores/raw_regret.npy', raw_regret)
    np.save('scores/local_reward.npy',local_reward)
    np.save('scores/global_reward.npy',global_reward)
    np.save('scores/mixed_reward.npy',mixed_reward)
    np.save('scores/regret_reward.npy', regret_reward)
    del raw_regret, local_reward, global_reward, mixed_reward, regret_reward


except KeyboardInterrupt:
    # save in case of interruption
    print('alpha:', alpha, 'Number of tries ran: ', len(raw_regret[str(round(alpha,2))]))
    np.save('scores/raw_regret.npy', raw_regret)
    np.save('scores/local_reward.npy',local_reward)
    np.save('scores/global_reward.npy',global_reward)
    np.save('scores/mixed_reward.npy',mixed_reward)
    np.save('scores/regret_reward.npy', regret_reward)
    del raw_regret, local_reward, global_reward, regret_reward

In [None]:
try:
    regret_reward = np.load('scores/regret_reward.npy',allow_pickle=True).item()
except FileNotFoundError:
    regret_reward = {}

horizon_plot = -1
fig = plt.figure(1,figsize=(9,6))
for alpha in alpha_list:
    plt.plot(regret_reward[str(round(alpha,2))][0][:horizon_plot], label=str(alpha))
plt.xlabel('$t$', fontsize=20)
plt.ylabel('Regret', fontsize=20)
plt.legend(fontsize=14)
plt.grid(alpha=0.5)