In [None]:
import numpy as np
import operator
import random
import matplotlib.pyplot as plt
from matplotlib.pyplot import savefig

In [None]:
params = {"A": 0.5,
          "B": 0.6,
          "C": 0.2}

nb_steps = 10000

sep1 = "\n_____________________________"
sep2 = "_____________________________\n"

In [None]:
def play_multi_armed_bandit(chosen_arm, params = params):
    for ii in params.keys():
        if chosen_arm == ii:
            value = np.random.binomial(1, params[ii], size=1)[0]
    return value

In [None]:
def get_UCB(choices, results):
    
    #assert (len(choices) == len(results)), "the choices list and results list must have the same length"
    
    t = len(choices)
    choices_set = set(choices)
    UCB = {}

    for arm in choices_set:
        arm_results = [results[x] for x in range(t) if choices[x] == arm]
        UCB[arm] = np.mean(arm_results) + np.sqrt(2*np.log(t) / len(arm_results))

    return UCB

In [None]:
def solve_problem(nb_step, algo, arms = {"A", "B", "C"}):
    results = []
    choices = []
    #results = {"A": [play_multi_armed_bandit("A")],
    #           "B": [play_multi_armed_bandit("B")],
    #           "C": [play_multi_armed_bandit("C")]}
    
    for choice in arms:
        choices.append(choice)
        results.append(play_multi_armed_bandit(choice))
        
    t = len(results)
    start = len(results) - 1

    for ii in range(start, nb_steps):
        
        if algo=="UCB":
            UCB = get_UCB(choices, results)
            chosen_arm = max(UCB.items(), key=operator.itemgetter(1))[0]
        elif algo == "random":
            chosen_arm = random.choice(["A","B","C"])
        
        choices.append(chosen_arm)
        results.append(play_multi_armed_bandit(chosen_arm))
        
        # results[chosen_arm] += [play_multi_armed_bandit(chosen_arm)]
        
        t += 1

    return choices, results

In [None]:
def get_means(choices, results):
    
    assert (len(choices) == len(results)), "the choices list and results list must have the same length"
    
    t = len(choices)
    choices_set = set(choices)
    means = {}

    for arm in choices_set:
        arm_results = [results[x] for x in range(t) if choices[x] == arm]
        means[arm] = np.mean(arm_results)

    return means

In [None]:
def plot_means_and_ucb(choices, results):
    ucb = get_UCB(choices, results)
    action_max = max(ucb.items(), key=operator.itemgetter(1))[0]
    action_other = list(set(ucb.keys()) - set(action_max))
    
    means = get_means(choices, results)
    means_other = [means[x] for x in action_other]
    
    # plot
    plt.ylim(0, 2.2)
    plt.scatter(ucb.keys(), ucb.values(), marker="_", alpha=0.5, s=500, color="r")
    plt.scatter(action_other, means_other, marker="o", alpha=0.5, s=500, color="b")
    plt.scatter(action_max, means[action_max], marker="o", alpha=0.5, s=500, color="r")

In [None]:
choices_ucb, results_ucb = solve_problem(nb_steps, "UCB", params.keys())
choices_random, results_random = solve_problem(nb_steps, "random", params.keys())

In [None]:
for i in range(1, 120):
    threshold = 50
    if i <= threshold:
        t = i
    else:
        t = threshold + (i - threshold)*20
    
    plot_means_and_ucb(choices_ucb[:t], results_ucb[:t])
    plt.title("step : {}".format(t))
    savefig('gif_ucb/ucb_step_%04d.png' % t)
    plt.show()

In [None]:
def evaluate_results(results, params):
    best_option_perf = max(params.values())
    experience_length = len(results)
    
    score = np.sum(results)
    target_score = best_option_perf * experience_length
    
    regret = target_score - score
    
    return score, target_score, regret

In [None]:
def compare_random_ucb(results_ucb, choices_ucb, results_random, choices_random, params):

    score_ucb, target_score, regret_ucb = evaluate_results(results_ucb, params)
    score_random, target_score, regret_random = evaluate_results(results_random, params)

    print("--- Params ---")
    for key in params.keys():
        print("Arm {}, conversion rate : {}".format(key, params[key]))
    print("\n")
    
    print("--- Strategy UCB ---")
    for key in params.keys():
        choices_key = [choices_ucb[x] == key for x in range(len(choices_ucb))]
        print("Arm {} : played {} times".format(key, np.sum(choices_key)))
    print("\n")
        
    print("--- Strategy random ---")
    for key in params.keys():
        choices_key = [choices_random[x] == key for x in range(len(choices_ucb))]
        print("Arm {} : played {} times".format(key, np.sum(choices_key)))
    print("\n")
    
    print("--- Scores ---")
    print("score target : {}".format(round(target_score)))
    print("score ucb : {}".format(round(score_ucb)))
    print("score random : {}\n".format(score_random))

    print("\n--- Regret ---")
    print("regret ucb : {}".format(round(regret_ucb)))
    print("regret random : {}\n".format(round(regret_random)))

In [None]:
t = 10000
compare_random_ucb(results_ucb[ : t],
                   choices_ucb[ : t],
                   results_random[ : t],
                   choices_random[ : t],
                   params)

In [None]:
0.6*10000 - (0.5*3312 + 0.6*3376 + 0.2*3312)

In [None]:
best_option_perf = max(params.values())
experience_length = len(results)
    
score = np.sum(results)
target_score = best_option_perf * experience_length
    
regret = target_score - score

In [None]:
best_option_perf = max(params.values())
best_option_perf

In [None]:
0.2*3312

In [None]:
result_A = [results_random[i] for i in range(len(results_random)) if choices_random[i]=="A"]
np.mean(result_A)*len(result_A)

In [None]:
result_B = [results_random[i] for i in range(len(results_random)) if choices_random[i]=="B"]
np.mean(result_B)*len(result_B)

In [None]:
result_C = [results_random[i] for i in range(len(results_random)) if choices_random[i]=="C"]
np.mean(result_C)*len(result_C)

In [None]:
len(result_A)

In [None]:
len(result_B)

In [None]:
len(result_C)

In [None]:
np.mean(result_A)

In [None]:
np.mean(result_B)

In [None]:
np.mean(result_C)

In [None]:
6000 - np.mean(result_A)*len(result_A) - np.mean(result_B)*len(result_B) - np.mean(result_C)*len(result_C)