In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.linalg import block_diag
from numpy.linalg import multi_dot
from scipy.linalg import sqrtm
from scipy.sparse import csgraph
from scipy import linalg
import os

In [2]:
N_TRIAL = 30000
N_ARMS = 100 #N_ARMS -> number of clients
N_FEATURE = 3
M = 29
K = 3

In [3]:
# X --> TBD now using x_{it} = [1,t,t^2] for all client i
Y_1 = np.genfromtxt('Y(noise0.1).csv',delimiter=',')
W = np.genfromtxt('W(noise0.1).csv',delimiter=',')
Beta = np.genfromtxt('Beta(noise0.1).csv',delimiter=',')
q = np.genfromtxt('q_init4.csv',delimiter=',')
c = np.genfromtxt('c_init4.csv',delimiter=',')

In [4]:
def make_regret(payoff, oracle):
    return np.cumsum(oracle - payoff)

def plot_regrets(results, oracle):
    [plt.plot(make_regret(payoff=x['r_payoff'], oracle=oracle), label="alpha: "+str(alpha)) for (alpha, x) in results.items()]

In [5]:
# X transformation from a sparse matrix
def X_reshape(X, X_tr, t, K, n_arms, n_feature):  #
  for arm in range(1, n_arms):
    X_tr = np.concatenate((X_tr,np.kron(np.identity(n = K),X[arm].reshape(-1,1))), axis = 1)
  return X_tr

# convert to a sparse matrix -> convert to a long sparse vector with flatten()
def X_to_X_m(X, t, arm_choice, n_arms, n_feature): 
  X_m = np.copy(X[t])
  for arm in np.arange(n_arms): # N x p
    if arm not in arm_choice:
      X_m[arm] = np.zeros(shape=n_feature)
  return X_m

In [6]:
# Create the F matrix
def constructAdjMatrix(W, n, threshold): #m
    Adj_mat = np.zeros(shape = (n, n))
    for ui in range(n):
        for uj in range(n):
            Adj_mat[ui][uj] = W[ui][uj]
        # trim the graph
            for i in range(n):
                if W[ui][i] <= threshold:
                    Adj_mat[ui][i] = 0;
#         Adj_mat[ui] /= sum(Adj_mat[ui])
    return Adj_mat
def constructLaplacianMatrix(W, n, Gepsilon):
    G = W.copy()
    #Convert adjacency matrix of weighted graph to adjacency matrix of unweighted graph
    for i in range(n):
        for j in range(n):
            if G[i][j] > 0:
                G[i][j] = 1
    L = csgraph.laplacian(G, normed = False)
    I = np.identity(n = G.shape[0])
    GW = I + Gepsilon*L  # W is a double stochastic matrix
    return GW.T

In [7]:
# Create the F matrix: (lda = 0 for CL-UCB wo)
lda = 0
threshold = 0.5
test_adj = constructAdjMatrix(W, N_ARMS, threshold)
test_F = constructLaplacianMatrix(test_adj, N_ARMS, lda)
FInv_Init = sqrtm(np.linalg.inv(np.kron(test_F, np.identity(n=K))))

In [8]:
#upload/download trigger
#UPLOAD
def upload(gammaU, IDclient, A_loc, A_up_buff):
    numerator = linalg.det(A_loc[IDclient])
    denominator = linalg.det(A_loc[IDclient] - A_up_buff[IDclient])
    if denominator == 0:
        return True
    else:
        check = numerator/denominator
        return check > gammaU

#DOWNLOAD
def download(gammaD, IDclient, A_gob, A_down_buff):
    numerator = linalg.det(A_gob)
    denominator = linalg.det(A_gob - A_down_buff[IDclient])
    #print(numerator/denominator)
    if denominator == 0:
        return True
    else:
        check = numerator/denominator
        return check > gammaD

def event_trigger(totalCommCost, IDclient, gammaU, gammaD, A_loc, A_up_buff, A_gob, A_down_buff, b_loc, b_up_buff, b_gob, b_down_buff, D_loc, D_up_buff, D_gob, \
                          D_down_buff, d_loc, d_up_buff, d_gob, d_down_buff, n_clients, n_feature, K):
    # check upload triggering event for A
    if upload(gammaU, IDclient, A_loc, A_up_buff):
        totalCommCost += 1
        # update server's statistics
        A_gob += A_up_buff[IDclient]
        b_gob += b_up_buff[IDclient]
                
        # update server's download buffer for other clients
        for clientID in np.arange(n_clients):
            if clientID != IDclient:
                A_down_buff[clientID] += A_up_buff[IDclient]
                b_down_buff[clientID] += b_up_buff[IDclient]
                        
        # clear client's upload buffer
        A_up_buff[IDclient] = np.zeros((K * n_feature, K * n_feature))
        b_up_buff[IDclient] = np.zeros(shape=K * n_feature)
        
        # print(A_up_buff[IDclient])
        # print((linalg.det(A_gob))/(linalg.det(A_gob-A_up_buff[IDclient])))
        # print(linalg.det(A_gob))
        # print(linalg.det(A_gob-A_down_buff[IDclient]))
        # check download triggering event for all clients
        for cli in np.arange(n_clients):
            if download(gammaD, cli, A_gob, A_down_buff):
                # print('here down')
                totalCommCost += 1
                
                # update client's local statistics, and clear server's download buffer
                A_loc[cli] += A_down_buff[cli]
                b_loc[cli] += b_down_buff[cli]
                
                # clear cserver's download buffer
                A_down_buff[cli] = np.zeros((K * n_feature, K * n_feature))
                b_down_buff[cli] = np.zeros(shape=K * n_feature)
                
    # check upload triggering event for D
    if upload(gammaU, IDclient, D_loc, D_up_buff):
        totalCommCost += 1
        # update server's statistics
        D_gob += D_up_buff[IDclient]
        d_gob += d_up_buff[IDclient]
                
        # update server's download buffer for other clients
        for clientID in np.arange(n_clients):
            if clientID != IDclient:
                D_down_buff[clientID] += D_up_buff[IDclient]
                d_down_buff[clientID] += d_up_buff[IDclient]
                        
        # clear client's upload buffer
        D_up_buff[IDclient] = np.zeros((n_clients * K, n_clients * K))
        d_up_buff[IDclient] = np.zeros(shape=n_clients * K)

        # check download triggering event for all clients
        for cli in np.arange(n_clients):
            if download(gammaD, cli, D_gob, D_down_buff):
                totalCommCost += 1
                
                # update client's local statistics, and clear server's download buffer
                D_loc[cli] += D_down_buff[cli]
                d_loc[cli] += d_down_buff[cli]
                
                # clear cserver's download buffer
                D_down_buff[cli] = np.zeros((n_clients * K, n_clients * K))
                d_down_buff[cli] = np.zeros(shape=n_clients * K)
    return totalCommCost, A_loc, A_up_buff, A_gob, A_down_buff, b_loc, b_up_buff, b_gob, b_down_buff, D_loc, D_up_buff, D_gob, D_down_buff, d_loc, d_up_buff, d_gob, d_down_buff

In [9]:
def Fed_CLUCB(eta_1, eta_2, alpha_q, alpha_c, X, Y, init_q, init_c, m, K, FInv, X_to_X_m, X_reshape, oracle, gammaU, gammaD):
    n_trial, n_clients, n_feature = X.shape

    # 1.1. Output objects
    totalCommCost = 0
    client_choice = np.empty(shape=(n_trial, m), dtype=int)
    r_payoff = np.empty(n_trial)   
    c_payoff = np.empty(n_trial) 
    cum_regret = np.empty(n_trial)
    p = np.empty(shape=(n_trial, n_clients))
    cum_totalCommCost = np.empty(n_trial)
    
    # te_q = np.empty(shape = (n_trial + 1, K * n_feature)) #Kp x 1
    # te_C_tilde = np.empty(shape = (n_trial + 1, n_arms * K)) #NK x 1
    
    # 1.2. Intialize local statistics
    A_loc = np.array([eta_1 * np.identity(n=K * n_feature) for _ in np.arange(n_clients)])
    A_up_buff = np.array([np.zeros((K * n_feature, K * n_feature)) for _ in np.arange(n_clients)])
    b_loc = np.array([np.zeros(shape=K * n_feature)  for _ in np.arange(n_clients)])
    b_up_buff = np.array([np.zeros(shape=K * n_feature)  for _ in np.arange(n_clients)])
    q_loc = np.empty(shape = (n_trial + 1, n_clients, K * n_feature)) #Kp x 1
    
    D_loc = np.array([eta_2 * np.identity(n=n_clients * K) for _ in np.arange(n_clients)])
    D_up_buff = np.array([np.zeros((n_clients * K, n_clients * K)) for _ in np.arange(n_clients)])
    d_loc = np.array([np.zeros(shape=n_clients * K)  for _ in np.arange(n_clients)])
    d_up_buff = np.array([np.zeros(shape=n_clients * K)  for _ in np.arange(n_clients)])
    c_loc = np.empty(shape = (n_trial + 1, n_clients, n_clients * K)) #NK x 1
    
    #add initialization for each client
    for b in np.arange(n_clients): 
        q_loc[0, b] = init_q
        c_loc[0, b] = init_c
    
    # 1.3 Global statistics
    A_gob = eta_1 * np.identity(n=K * n_feature) 
    A_down_buff = np.array([np.zeros((K * n_feature, K * n_feature)) for _ in np.arange(n_clients)])  
    b_gob = np.zeros(shape=K * n_feature)
    b_down_buff = np.array([np.zeros(shape=K * n_feature)  for _ in np.arange(n_clients)])
    
    D_gob = eta_2 * np.identity(n=n_clients * K) 
    D_down_buff = np.array([np.zeros((n_clients * K, n_clients * K)) for _ in np.arange(n_clients)])  
    d_gob = np.zeros(shape=n_clients * K)
    d_down_buff = np.array([np.zeros(shape=n_clients * K)  for _ in np.arange(n_clients)])
    
    
    # 2. Algorithm
    for t in np.arange(n_trial):
        for a in np.arange(n_clients):
            #Calculate inv(A_loc[a]), inv(D_loc[a]), q_loc[t,a], c_loc[t,a]
            inv_A = np.linalg.inv(A_loc[a])
            inv_D = np.linalg.inv(D_loc[a])
            if t != 0:
                q_loc[t, a] = inv_A.dot(b_loc[a])
                c_loc[t, a] = inv_D.dot(d_loc[a])
            #X Transformation for q case 
            X_temp = X_to_X_m(X, t, [a], n_clients, n_feature)    
            X_tr_init = np.kron(np.identity(n = K),X_temp[0].reshape(-1,1))
            X_tr = X_reshape(X_temp, X_tr_init, t, K, n_clients, n_feature) #Kp x NK 
            
            #X Transformation for c case
            X_tilde = FInv.dot(X_to_X_m(X, t, [a], n_clients, n_feature).flatten()) #Np x 1
            
            #Calculate cb_q and cb_c
            #cb_q  
            X_q_a = X_tr.dot(FInv.dot(c_loc[t, a]))
            cb_q = alpha_q * np.sqrt(X_q_a.T.dot(inv_A).dot(X_q_a))
            
            #cb_c
            # q_block = (block_diag(*[q_loc[t, a].reshape((K, n_feature)) for _ in np.arange(n_clients)])).T #Np x NK
            q_block = np.kron(np.eye(n_clients,dtype=int),q_loc[t, a].reshape((K,n_feature)).T)
            X_c = q_block.T.dot(X_tilde)
            cb_c = alpha_c * np.sqrt((X_c).T.dot(inv_D).dot(X_c))
            
            #Predictions
            p[t, a] = (FInv.dot(c_loc[t, a]).T).dot(X_tr.T).dot(q_loc[t, a]) + cb_q + cb_c
            
        # The central server chooses m best clients
        # idx = np.argpartition(p[t], -m)[-m:]
        # chosen_clients = idx[np.argsort(-(p[t])[idx])]
        # for i in np.arange(m):
        #     client_choice[t][i] = chosen_clients[i]
        chosen_clients = p[t].argsort()[-m:][::-1]
        for i in np.arange(m):
            client_choice[t][i] = chosen_clients[i]
                
            # Update local statistics based on following conditions
        for chosen_client in client_choice[t]:
            
            # client local statistics update
            
            X_tr_chosen_temp = X_to_X_m(X, t, [chosen_client], n_clients, n_feature)
            X_tr_init_cs = np.kron(np.identity(n = K),X_tr_chosen_temp[0].reshape(-1,1))
            X_1_tr_chosen =  X_reshape(X_tr_chosen_temp, X_tr_init_cs, t, K, n_clients, n_feature)
            X_tilde_chosen = FInv.dot(X_to_X_m(X, t, [chosen_client], n_clients, n_feature).flatten())
            #q_block_chosen = (block_diag(*[q_loc[t, chosen_client].reshape((K, n_feature)) for _ in np.arange(n_clients)])).T
            q_block_chosen = np.kron(np.eye(n_clients,dtype=int),q_loc[t, chosen_client].reshape((K,n_feature)).T)
            X_q = FInv.dot(c_loc[t, chosen_client]).dot(X_1_tr_chosen.T)
            X_C_Tilde = q_block_chosen.T.dot(X_tilde_chosen)
            
            A_loc[chosen_client] = A_loc[chosen_client] + np.outer(X_q, X_q)
            A_up_buff[chosen_client] = A_up_buff[chosen_client] + np.outer(X_q, X_q)
            b_loc[chosen_client] = b_loc[chosen_client] + Y[t, chosen_client] * X_q
            b_up_buff[chosen_client] = b_up_buff[chosen_client] + Y[t, chosen_client] * X_q
            
            D_loc[chosen_client] = D_loc[chosen_client] + np.outer(X_C_Tilde, X_C_Tilde)
            D_up_buff[chosen_client] = D_up_buff[chosen_client] + np.outer(X_C_Tilde, X_C_Tilde)
            d_loc[chosen_client] = d_loc[chosen_client] + Y[t, chosen_client] * X_C_Tilde
            d_up_buff[chosen_client] = d_up_buff[chosen_client] + Y[t, chosen_client] * X_C_Tilde
            
            # check upload triggering event for each local statistics A, D
            totalCommCost, A_loc, A_up_buff, A_gob, A_down_buff, b_loc, b_up_buff, b_gob, b_down_buff, \
            D_loc, D_up_buff, D_gob, D_down_buff, d_loc, d_up_buff, d_gob, d_down_buff = \
            event_trigger(totalCommCost, chosen_client, gammaU, gammaD, A_loc, A_up_buff, A_gob, \
                          A_down_buff, b_loc, b_up_buff, b_gob, b_down_buff, D_loc, D_up_buff, D_gob, \
                          D_down_buff, d_loc, d_up_buff, d_gob, d_down_buff,n_clients, n_feature, K)
            
            #else: if do not pass the upload, then the statistics are still the same in local
               
        #else: for other clients not selected at round t, the statistics are still the same in local
        
                
        # Cumulative regret
        r_payoff[t] = np.sum([Y[t, choice] for choice in client_choice[t]])      
        cum_regret[t] = np.sum(oracle[0:t+1] - r_payoff[0:t+1])
        cum_totalCommCost[t] = totalCommCost
        if (t+1) % 1000 == 0:
            print('TRIAL:',t,'DONE', '| cum_regret:', cum_regret[t])
            print('Total Communication cost:', totalCommCost)
        # print(cum_regret[t], totalCommCost)
        
    return dict(A_gob=A_gob, b_gob=b_gob, D_gob=D_gob, d_gob=d_gob, q_loc=q_loc, c_loc = c_loc, p = p, client_choice = client_choice, r_payoff = r_payoff, totalCommCost=totalCommCost, cum_totalCommCost=cum_totalCommCost)

In [10]:
# Create X_i = [1, t, t^2]
X_1_lst = []
for T in np.arange(N_TRIAL):
  X_1t_lst = []
  for arm in np.arange(N_ARMS):
    temp = []
    temp.append(1)
    temp.append(0.001*(T+1))
    temp.append((0.001*(T+1))**2)
    X_1t_lst.append(np.array(temp))
  X_1_lst.append(np.array(X_1t_lst))
X_1 = np.array(X_1_lst)

In [11]:
oracle_lst = []
true_choice = []
new_y = -1 * Y_1 + 30 #
for t in np.arange(N_TRIAL):
  # Find indices of M highest arms
  all_reward_t = [new_y.T[t, arm] for arm in np.arange(N_ARMS)]
  chosen_arms = np.array(all_reward_t).argsort()[-M:][::-1]
  # Sum of M highest rewards
  oracle_payoff_t = np.sum([new_y.T[t, choice] for choice in chosen_arms])
  # Append to the list
  oracle_lst.append(oracle_payoff_t)
  true_choice.append(chosen_arms)
oracle_case1 = np.array(oracle_lst)

In [12]:
# Initialize q and C
# vec_q: q (Kp x 1)
np.random.seed(3) #3 #59
vec_q = q[~np.isnan(q)]
# vec_C: C (NK x 1)
np.random.seed(42)
vec_C = c

In [None]:
alpha_to_test = [1]
print('M:', M, 'lda:', lda, 'T', threshold)
results_dict = {alpha: Fed_CLUCB(eta_1 = 0.3, eta_2 = 0.3, alpha_q =1, alpha_c = alpha, X=X_1, Y=(-1 * Y_1 + 30).T, init_q=vec_q, init_c=vec_C,m=M, K = K, FInv=FInv_Init, X_to_X_m=X_to_X_m, X_reshape=X_reshape, oracle=oracle_case1, gammaU=1, gammaD=1)\
                for alpha in alpha_to_test}

M: 29 lda: 0 T 0.5
TRIAL: 999 DONE | cum_regret: 490.04074729292785
Total Communication cost: 2987800
TRIAL: 1999 DONE | cum_regret: 504.3435069442322
Total Communication cost: 5887800
TRIAL: 2999 DONE | cum_regret: 517.5963835743722
Total Communication cost: 8787800
TRIAL: 3999 DONE | cum_regret: 534.1659077467272
Total Communication cost: 11687800
TRIAL: 4999 DONE | cum_regret: 558.8910686176964
Total Communication cost: 14587800
TRIAL: 5999 DONE | cum_regret: 593.5322699432845
Total Communication cost: 17487800
TRIAL: 6999 DONE | cum_regret: 623.7153904031343
Total Communication cost: 20387800
TRIAL: 7999 DONE | cum_regret: 650.4785754418426
Total Communication cost: 23287800
TRIAL: 8999 DONE | cum_regret: 693.4590002494862
Total Communication cost: 26187800
TRIAL: 9999 DONE | cum_regret: 742.7287463471193
Total Communication cost: 29087800
TRIAL: 10999 DONE | cum_regret: 781.8504639828495
Total Communication cost: 31987800
TRIAL: 11999 DONE | cum_regret: 834.2258282672669
Total Com

In [None]:
test_lst = [x['cum_totalCommCost'] for (alpha, x) in results_dict.items()]
df = pd.DataFrame(test_lst[0]) #index 4 is for alpha = 10 
df.to_csv('C1_fedCLUCBwo_CommCost_29.csv', header=False)

In [None]:
# # regret data
test_lst = [make_regret(payoff=x['r_payoff'], oracle=oracle_case1) for (alpha, x) in results_dict.items()]
df = pd.DataFrame(test_lst[0])
df.to_csv('C1_fedCLUCBwo_Regret_29.csv', header=False)

In [None]:
#before fixing
# M: 29 lda: 0 T 0.5
# TRIAL: 999 DONE | cum_regret: 1462.5595078364586
# Total Communication cost: 5784500
# TRIAL: 1999 DONE | cum_regret: 2644.2109365436763
# Total Communication cost: 11584500
# TRIAL: 2999 DONE | cum_regret: 3848.4725725551343
# Total Communication cost: 17384500
# TRIAL: 3999 DONE | cum_regret: 5091.526530856386
# Total Communication cost: 23184500

In [None]:
# #Update A, b, D, d for each selected arm
#         te_q[t] = q[t]
#         te_C_tilde[t] = C_tilde[t]
#         for j in range(5):
#             for chosen_arm in arm_choice[t]:
#                 X_tr_chosen_temp = X_to_X_m(X, t, [chosen_arm], n_arms, n_feature)
#                 X_tr_init_cs = np.kron(np.identity(n = K),X_tr_chosen_temp[0].reshape(-1,1))
#                 X_1_tr_chosen =  X_reshape(X_tr_chosen_temp, X_tr_init_cs, t, K, n_arms, n_feature)
            
#                 #x_tr for c case
#                 X_tilde_chosen = FInv.dot(X_to_X_m(X, t, [chosen_arm], n_arms, n_feature).flatten())
#                 # q_block_chosen = (block_diag(*[q[t].reshape((K, n_feature)) for _ in np.arange(n_arms)])).T
#                 q_block_chosen = (block_diag(*[te_q[t].reshape((K, n_feature)) for _ in np.arange(n_arms)])).T
            
#               #Update  
#                 X_q = FInv.dot(te_C_tilde[t]).dot(X_1_tr_chosen.T)
#                 X_C_Tilde = q_block_chosen.T.dot(X_tilde_chosen)
#                 A = A + np.outer(X_q, X_q)
#                 b = b + Y[t, chosen_arm] * X_q
#                 D = D + np.outer(X_C_Tilde, X_C_Tilde)           
#                 d = d + Y[t, chosen_arm] * X_C_Tilde
        
#             #inverse calculation
#             inv_A = np.linalg.inv(A)
#             inv_D = np.linalg.inv(D)
#             # q[t + 1] = inv_A.dot(b)
#             # C_tilde[t + 1] = inv_D.dot(d)
#             te_q[t] = inv_A.dot(b)
#             te_C_tilde[t] = inv_D.dot(d)
#         q[t + 1] = te_q[t]
#         C_tilde[t + 1] = te_C_tilde[t]

In [None]:
# 55.15406097592552 200
# 114.8111138963292 3100
# 183.0768480123816 6000
# 248.3580568245746 8900
# 267.29491803294195 13500
# 268.3516718607807 19300
# 269.57818272199074 25100
# 271.2947815811013 30900
# 272.4592831337542 36700
# 273.86301047219683 42500
# 275.21604333608764 48300
# 276.66764601735986 54100
# 278.63329419340454 59900
# 280.4678674639548 65700
# 282.2424565978083 71500
# 283.90520242056067 77300
# 285.10465733644725 83100
# 287.65371280579296 88900
# 289.22537398508416 94700
# 290.9280543765692 100500
# 291.9699077144874 106300
# 293.8109194887885 112100
# 295.573853339366 117900
# 297.33565120601475 123700
# 299.15243086237103 129500
# 300.82503076792034 135300
# 302.4343860924079 141100
# 304.0327756427158 146900
# 306.5369731263834 152700
# 308.38888753156465 158500
# 310.164500055526 164300
# 311.28218430465296 170100
# 313.1998664327633 175900
# 314.9943041671996 181700
# 316.20310525400566 187500
# 317.7963529797213 193300
# 319.4268201296704 199100
# 320.3885968631429 204900
# 322.2111678953204 210700
# 323.40215238458734 216500
# 325.0611011907497 222300
# 326.3357271283111 228100
# 327.3997635999691 233900
# 328.82011792957167 239700
# 330.04982503235874 245500
# 331.4810107927615 251300
# 332.1962316983845 257100
# 333.4353240926906 262900
# 334.6459616966856 268700
# 335.88077689371664 274500
# 337.2369418488231 280300
# 339.05224891627233 286100
# 340.4532284477658 291900
# 342.1428072979302 297700
# 343.6569040832185 303500
# 344.90613648456946 309300
# 345.5625853992278 315100
# 346.68271945088406 320900
# 347.88632031853456 326700
# 348.9352449365841 332500
# 350.2234206466882 338300
# 351.2513485767839 344100
# 352.4432962380689 349900
# 354.100377194595 355700
# 355.2858960580144 361500
# 356.30988646978903 367300
# 357.44522036138244 373100
# 359.2476056969442 378900
# 360.90903002123315 384700
# 362.0413216333418 390500
# 363.0222647307714 396300
# 363.9616743421146 402100
# 364.9986199864545 407900
# 366.1183131719929 413700
# 367.0790495283877 419500
# 368.39245985902335 425300
# 370.4279489694685 431100
# 371.6648368904978 436900
# 373.3882240937543 442700
# 374.6962164497646 448500
# 375.62479160733744 454300
# 376.7176049411076 460100
# 378.15565538872113 465900
# 379.47111732042265 471700
# 380.9300987480244 477500
# 382.039179919067 483300
# 382.9961300064961 489100
# 384.0791198696953 494900
# 385.91683018892127 500700
# 388.0381287425023 506500
# 389.29212144364936 512300
# 390.6914613920817 518100
# 391.87658701205135 523900
# 393.0921193822501 529700
# 394.91420070649326 535500
# 396.051528738898 541300
# 397.37262011971393 547100
# 399.0009003094527 552900
# 400.2855444221402 558700
# 401.2799108875152 564500

In [None]:
# M: 29 lda: 0 T 0.5
# TRIAL: 49 DONE | cum_regret: 453.69693595616025
# Total Communication cost: 232800
# TRIAL: 99 DONE | cum_regret: 462.0488809242866
# Total Communication cost: 377800
# TRIAL: 149 DONE | cum_regret: 470.5800888813603
# Total Communication cost: 522800
# TRIAL: 199 DONE | cum_regret: 472.3803961182125
# Total Communication cost: 667800
# TRIAL: 249 DONE | cum_regret: 474.30673803317126
# Total Communication cost: 812800
# TRIAL: 299 DONE | cum_regret: 475.6049232914372
# Total Communication cost: 957800
# TRIAL: 349 DONE | cum_regret: 476.7737590641307
# Total Communication cost: 1102800
# TRIAL: 399 DONE | cum_regret: 479.60803989965837
# Total Communication cost: 1247800
# TRIAL: 449 DONE | cum_regret: 479.81206041020926
# Total Communication cost: 1392800
# TRIAL: 499 DONE | cum_regret: 480.58317247281354
# Total Communication cost: 1537800
# TRIAL: 549 DONE | cum_regret: 481.00398259950975
# Total Communication cost: 1682800
# TRIAL: 599 DONE | cum_regret: 481.60037256262046
# Total Communication cost: 1827800
# TRIAL: 649 DONE | cum_regret: 482.60762401344783
# Total Communication cost: 1972800
# TRIAL: 699 DONE | cum_regret: 483.3292485026203
# Total Communication cost: 2117800
# TRIAL: 749 DONE | cum_regret: 483.83437012123716
# Total Communication cost: 2262800
# TRIAL: 799 DONE | cum_regret: 485.0707560018169
# Total Communication cost: 2407800
# TRIAL: 849 DONE | cum_regret: 487.65404343097134
# Total Communication cost: 2552800
# TRIAL: 899 DONE | cum_regret: 488.6555308978984
# Total Communication cost: 2697800
# TRIAL: 949 DONE | cum_regret: 489.27219670104716
# Total Communication cost: 2842800
# TRIAL: 999 DONE | cum_regret: 490.04074729292785
# Total Communication cost: 2987800