In [1]:
import numpy as np

import math

In [2]:
K = 2
means = [0, 0.1]

sigma = 1
delta = 0.05
theta = math.e
target_arm = 0


def beta(n, arg_delta = delta, arg_sigma = sigma, arg_K = K):
    beta = 2 * sigma * sigma / n
    beta *= np.log(math.pi ** 2 * K * (n ** 2) / (3 * delta))
    return np.sqrt(beta)

def generate_reward(a):
    return np.random.normal(means[a], sigma)



In [3]:
# select arm according to UCB
def select_arm(cum_rew, num_pulls, t):
    emp_means = cum_rew/num_pulls
    index = emp_means + 3 * sigma * np.sqrt(np.log (t) / num_pulls)
    best_arm = np.argmax(index)
    return best_arm

# Update cum reward and compute attack using baseline Jun et al.
def compute_round_baseline(a, t, cum_reward, num_pulls, Delta):
    if a == target_arm:
        cum_reward[target_arm] += generate_reward(a)
        num_pulls[target_arm] += 1
        return 0

    preattack_reward = generate_reward(a)
    cum_reward[a] += preattack_reward
    num_pulls[a] += 1
    emp_mean = cum_reward / num_pulls

    gap = emp_mean[target_arm] - emp_mean[a] - 2 * beta(num_pulls[target_arm]) - Delta

    if gap < 0:
        alpha = - gap * num_pulls[a]
        cum_reward[a] -= alpha
        return alpha

    else:
        return 0

# Update cum reward and compute attack using optimal strategy
def compute_round(a, t, cum_reward, num_pulls):
    if a == target_arm:
        cum_reward[target_arm] += generate_reward(a)
        num_pulls[target_arm] += 1
        return 0

    preattack_reward = generate_reward(a)
    cum_reward[a] += preattack_reward
    num_pulls[a] += 1
    emp_mean = cum_reward / num_pulls

    gap = emp_mean[target_arm] - emp_mean[a] - 2 * beta(num_pulls[target_arm]) - 3 * sigma * (theta ** num_pulls[a])

    if gap < 0:
        alpha = - gap * num_pulls[a]
        cum_reward[a] -= alpha
        return alpha

    else:
        return 0


def do_trial_optimal(T):
    cum_attack = 0
    non_target_pulls = 0
    cum_reward = np.zeros(shape = (K,))
    num_pulls = np.zeros(shape = (K,))

    compute_round(target_arm, 0, cum_reward, num_pulls)
    for t in range(1, K):
        a = t
        cum_attack += compute_round(a, t, cum_reward, num_pulls)
        if a != target_arm:
            non_target_pulls += 1

    for t in range(K, T):
        a = select_arm(cum_reward, num_pulls, t)
        if a != target_arm:
            non_target_pulls += 1
        if t % 1e6 == 0:
            print ("round ", t)
        cum_attack += compute_round(a, t, cum_reward, num_pulls)
    return cum_attack, non_target_pulls

def do_trial_baseline(T, Delta):
    cum_attack = 0
    non_target_pulls = 0
    cum_reward = np.zeros(shape = (K,))
    num_pulls = np.zeros(shape = (K,))

    compute_round_baseline(target_arm, 0, cum_reward, num_pulls, Delta)
    for t in range(1, K):
        a = t
        cum_attack += compute_round_baseline(a, t, cum_reward, num_pulls, Delta)
        if a != target_arm:
            non_target_pulls += 1

    for t in range(K, T):
        a = select_arm(cum_reward, num_pulls, t)
        if a != target_arm:
            non_target_pulls += 1
        cum_attack += compute_round_baseline(a, t, cum_reward, num_pulls, Delta)
    return cum_attack, non_target_pulls


def doGridUCBandCompare(nTrial, T):
    global sigma

    print ("this work..")
    for sigma in [0.1, 1, 2]:
        for mu in [0.1, 1, 2]:
            means[1] = mu
            print ("sigma, mu = ", sigma, mu)

            cum_attack = 0
            non_target_pulls = 0
            for tt in range(nTrial):
                xx, yy = do_trial_optimal(T)
                cum_attack += xx
                non_target_pulls += yy
            print (cum_attack / nTrial, non_target_pulls / nTrial)

    print ("\n jun et al. ")
    for sigma in [0.1, 1, 2]:
        for mu in [0.1, 1, 2]:
            means[1] = mu
            cum_attack = 0
            non_target_pulls = 0
            for tt in range(nTrial):
                xx, yy = do_trial_baseline(T, sigma)
                cum_attack += xx
                non_target_pulls += yy
            print ("sigma, mu = ", sigma, mu)
            print (cum_attack / nTrial, non_target_pulls / nTrial)



    print ("finished grid...!!!")

In [5]:
# Thomspon Sampling

# select arm according to TS
def selectArmTS(cumRew, nPulls):
    nu = np.random.normal(cumRew / nPulls, 1/nPulls)
    a = np.argmax(nu)
    return a

# cumrew represent post attack
def doRoundTS(a, t, cumRew, nPulls):
    if a == target_arm:
        cumRew[target_arm] += generate_reward(a)
        nPulls[target_arm] += 1
        return 0

    preattack_reward = generate_reward(a)
    cumRew[a] += preattack_reward
    nPulls[a] += 1
    emp_mean = cumRew / nPulls

    gap = emp_mean[target_arm] - emp_mean[a] - 2 * beta(nPulls[target_arm]) - 2.9 * (theta ** nPulls[a]) - 2 * np.sqrt(np.log( np.pi**2 * K / (3 * delta) ))

    if gap < 0:
        alpha = - gap * nPulls[a]
        cumRew[a] -= alpha
        return alpha

    else:
        return 0

def doSingleTrialTS(T):
    cum_attack = 0
    non_target_pulls = 0
    cum_reward = np.zeros(shape = (K,))
    num_pulls = np.zeros(shape = (K,))

    doRoundTS(target_arm, 0, cum_reward, num_pulls)
    for t in range(1, K):
        a = t
        cum_attack += doRoundTS(a, t, cum_reward, num_pulls)
        if a != target_arm:
            non_target_pulls += 1

    for t in range(K, T):
        a = selectArmTS(cum_reward, num_pulls)
        if a != target_arm:
            non_target_pulls += 1
        if t % 1e6 == 0:
            print ("round ", t)
        cum_attack += doRoundTS(a, t, cum_reward, num_pulls)
    return cum_attack, non_target_pulls

def doGridTS(nTrial = 10, T = 10**4):
    global sigma
    print("TS attack")
    for sigma in [0.1, 1, 2]:
        for mu in [0.1, 1, 2]:
            means[1] = mu
            print ("sigma, mu = ", sigma, mu)

            cum_attack = 0
            non_target_pulls = 0
            for tt in range(nTrial):
                xx, yy = doSingleTrialTS(T)
                cum_attack += xx
                non_target_pulls += yy
            print (cum_attack / nTrial, non_target_pulls / nTrial)

    print ("\n")


    print ("finished grid...!!!")


In [6]:
# Results for Thompson sampling
doGridTS(nTrial = 10, T = 10**4)

doGridTS(nTrial = 10, T = 10**4)

doGridTS(nTrial = 10, T = 10**4)

TS attack
sigma, mu =  0.1 0.1
12.944895498924046 1.0
sigma, mu =  0.1 1
13.933744137733873 1.0
sigma, mu =  0.1 2
14.93003711166433 1.0
sigma, mu =  1 0.1
18.822941451443967 1.0
sigma, mu =  1 1
19.584302510945875 1.0
sigma, mu =  1 2
20.734678928854148 1.0
sigma, mu =  2 0.1
24.835133699062858 1.0
sigma, mu =  2 1
25.54096567088045 1.0
sigma, mu =  2 2
27.0702423356027 1.0


finished grid...!!!
TS attack
sigma, mu =  0.1 0.1
13.005197794015643 1.0
sigma, mu =  0.1 1
13.9514869237256 1.0
sigma, mu =  0.1 2
14.909866260599282 1.0
sigma, mu =  1 0.1
18.60645032153848 1.0
sigma, mu =  1 1
19.56238408987872 1.0
sigma, mu =  1 2
20.428627628140294 1.0
sigma, mu =  2 0.1
25.892496111154706 1.0
sigma, mu =  2 1
25.978463921278102 1.0
sigma, mu =  2 2
27.419651512039955 1.0


finished grid...!!!
TS attack
sigma, mu =  0.1 0.1
13.069700755978877 1.0
sigma, mu =  0.1 1
13.91943336730489 1.0
sigma, mu =  0.1 2
14.959530943081859 1.0
sigma, mu =  1 0.1
19.020966310471245 1.0
sigma, mu =  1 1
19.0

In [7]:
# Results for UCB

doGridUCBandCompare(nTrial = 10, T = 10 ** 4)

this work..
sigma, mu =  0.1 0.1
1.4281073171384822 1.0
sigma, mu =  0.1 1
2.4136289087204306 1.0
sigma, mu =  0.1 2
3.42046393080565 1.0
sigma, mu =  1 0.1
14.219267059455845 1.0
sigma, mu =  1 1
15.887003972961015 1.0
sigma, mu =  1 2
16.906020612697887 1.0
sigma, mu =  2 0.1
29.15635058609837 1.0
sigma, mu =  2 1
30.224959914293624 1.0
sigma, mu =  2 2
31.061504846635398 1.0

 jun et al. 
sigma, mu =  0.1 0.1
11.560503895608075 54.8
sigma, mu =  0.1 1
61.348866604622685 54.8
sigma, mu =  0.1 2
115.68003476367292 54.8
sigma, mu =  1 0.1
68.02108126168851 53.5
sigma, mu =  1 1
116.37273437380152 55.0
sigma, mu =  1 2
141.17496138010637 44.2
sigma, mu =  2 0.1
135.04354380261088 54.1
sigma, mu =  2 1
177.01701689877058 55.0
sigma, mu =  2 2
232.5134614112441 55.1
finished grid...!!!


In [None]:
baseline = [[23.6, 129.4, 247.3], [114.4, 241.7, 360.3], [239.4, 367.6, 475.5]]

UCB = [[1.3, 2.4, 3.6], [14.5, 15.9, 16.8], [30.3, 30.7, 31.0]]

TS = [[13.0, 13.9, 15.0], [19.0, 19.7, 20.6], [23.8, 25.0, 26.7]]