In [1]:
#include("MAB_module.jl")
include("environment.jl")
include("algorithm_base.jl")
include("eps_greedy.jl")
include("rs.jl")
include("lsx.jl")
#include("meta-bandit.jl")
include("ucb1tuned.jl")

update! (generic function with 4 methods)

In [2]:
using Plots
using Statistics
#using Base.Threads

In [3]:
mutable struct MYRS <: Algorithm
    env::Environment
    actionValues::Vector{Float64}
    counts::Vector{Float64}             #numbers of selection of each arm.
    sum_rewards::Vector             #sum of an earned reward of each arm
    average::Vector
    r::Float64
    gamma::Float64
    alpha_r::Float64
    n::Float64
    opt::Bool
    test_name::String
    #constructor
    function MYRS(;env::Environment, r::Float64, gamma::Float64, alpha_r::Float64, n=0.,opt=false, test_name="")
        return new( env,
                    zeros(env.arm_num),
                    zeros(env.arm_num),
                    zeros(env.arm_num),
                    zeros(env.arm_num),
                    r,
                    gamma,
                    alpha_r,
                    n,
                    opt,
                    test_name)
    end
end

In [37]:
function init!(algo::MYRS)
    init_algo!(algo)
    if algo.opt
        sorted_pro = sort(algo.env.arm_pros, rev=true)
        algo.r = (sorted_pro[1] + sorted_pro[2]) / 2
    else
        algo.r = 1.
    end
end

function select_arm(algo::MYRS)
    #return index of maximum value in the action values.
    return greedy(algo)
end

function calc_value(algo::MYRS, selected, reward)
        
    if algo.test_name == "gamma"
        algo.average[selected] = algo.sum_rewards[selected] / algo.counts[selected]
        #for i=1:algo.env.arm_num
        #    algo.counts[i] = algo.gamma * algo.counts[i]
        #    algo.sum_rewards[i] = algo.gamma * algo.sum_rewards[i]
        #end
        
    else
        algo.average[selected] = algo.sum_rewards[selected] / algo.counts[selected]
        #algo.r += algo.alpha_r * (algo.average[selected] - algo.r)
        #algo.r += algo.alpha_r * (algo.sum_rewards[selected] / algo.counts[selected] - algo.r)
    end
    
    if !algo.opt
        algo.r += algo.alpha_r * (reward - algo.r)
    end
    
    if algo.test_name == "gamma"
#         for i=1:algo.env.arm_num
#             algo.actionValues[i] = algo.gamma * algo.actionValues[i]
#         end
        algo.actionValues[selected] += reward - algo.r
    else
        algo.actionValues[selected] = algo.counts[selected]*(algo.average[selected] - algo.r)
    end

    #@show algo.r selected
end

#update each variables and calc parameters for epsilon greedy algorithm
function update!(algo::MYRS)
    selected = select_arm(algo)
    reward = get_reward(algo.env.arm_pros, selected)

    #update this experiment's current state.
    #algo.n[selected] = (reward + algo.gamma*algo.n[selected])/(1. + algo.gamma)
    algo.counts[selected] = algo.counts[selected] + 1
    algo.sum_rewards[selected] += reward

    #calculation of action value and save.
    calc_value(algo, selected, reward)
    
#     for i=1:algo.env.arm_num
#         algo.counts[i] = algo.gamma * algo.counts[i]
#     end

    #calc regret.
    regret = algo.env.max_pro - algo.env.arm_pros[selected]

    return selected, regret, reward

end

update! (generic function with 5 methods)

In [41]:
function simulation(;sim_num::Int, steps::Int, update_per::Int, arm_num=4, dynamic=false)
    #argument checking.
    if dynamic && update_per <= steps && steps % update_per != 0
        println("update number error.")
        return
    end

    update_num = div(steps, update_per)#convert(Int64, steps/update_per)
    env = Environment(arm_num)

    algo_dict = Dict()
    
    for alpha=[0.0005]
        gamma = 1.
        #alpha = 1/(10^i)
        algo_dict["RS \\alpha=$alpha"] = MYRS(env=env, r=1., gamma=gamma, alpha_r=alpha, test_name="gamma")
    end
    algo_dict["RS opt \\gamma"] = MYRS(env=env, r=1., gamma=0.999, alpha_r=0.0005, opt=true, test_name="gamma")
    #algo_dict["RS opt"] = MYRS(env=env, r=1., gamma=0.999, alpha_r=0.0005, opt=true)
#     for gamma in [0.7, 0.5, 0.3]
#         algo_dict["MYRS merge gamma=$gamma"] = MYRS(env, 1., gamma, 0.001, "merge")
#     end

    algo_dict["RS"] = RS(env)
    #algo_dict["UCB1 tuned"] = UCB1(env, true)
    
    regret_means, win_means, action_vals = Vector{}(), Vector{}(), Vector{}()
    for algorithm in values(algo_dict)
        regrets, wins = zeros(sim_num, steps), zeros(sim_num, steps)
        #action_val = Vector{}()
        for sim in 1:sim_num
            ds = rand(update_num,arm_num)
            update_env!(env, ds[1, :])
            init!(algorithm)
            regret = 0.
            
            for step in 1:steps-1
                selected, rgt, reward = update!(algorithm)

                #save each parameter.
                regret += rgt
                regrets[sim, step] = regret

                if selected == env.correct_arm
                    wins[sim, step] = 1
                end
                
                if dynamic
                    if step % update_per == 0
                        ds_idx = div(step, update_per)
                        update_env!(env, ds[ds_idx+1, :])
                        #println("<----------updated------------->")
                        if typeof(algorithm) == RS
                            update_r!(algorithm)
                        elseif typeof(algorithm) == LSX && algorithm.opt
                            algorithm.r = opt_r(algorithm.env.arm_pros)
                        elseif typeof(algorithm) == MYRS
                            if algorithm.opt
                                sorted_pro = sort(env.arm_pros, rev=true)
                                algorithm.r = (sorted_pro[1] + sorted_pro[2]) / 2
                            end
                        end
                    end
                end

            end
            if sim % 10 == 0
                print("$((sim/sim_num)*100) %")
            end
        end
        push!(regret_means, [mean(regrets[:, i]) for i=1:steps])
        push!(win_means, [mean(wins[:, i]) for i=1:steps])
        #push!(action_vals, vcat(action_val...))
        
    end
    
    println("DONE.")
    
    #@show action_vals

    graph_data = hcat(win_means...)
    time = Vector{Int}(1:steps)
    #xscale=:log

    #graph_data2 = vcat(rslist...)
    #@show size(graph_data2)
    graph_data2 = hcat(regret_means...)

    return graph_data, graph_data2, algo_dict
end

simulation (generic function with 1 method)

In [None]:
@time g1, g2, algo_dict = simulation(sim_num=1000,
                            steps=30000,
                            update_per=10000,
                            arm_num=20,
                            dynamic=false);

1.0 %2.0 %3.0 %4.0 %5.0 %6.0 %7.000000000000001 %8.0 %9.0 %10.0 %11.0 %12.0 %13.0 %14.000000000000002 %15.0 %16.0 %17.0 %18.0 %19.0 %20.0 %21.0 %22.0 %23.0 %24.0 %25.0 %26.0 %27.0 %28.000000000000004 %28.999999999999996 %30.0 %31.0 %32.0 %33.0 %34.0 %35.0 %36.0 %37.0 %38.0 %39.0 %40.0 %41.0 %42.0 %43.0 %44.0 %45.0 %46.0 %47.0 %48.0 %49.0 %50.0 %51.0 %52.0 %53.0 %54.0 %55.00000000000001 %56.00000000000001 %56.99999999999999 %57.99999999999999 %59.0 %60.0 %61.0 %62.0 %63.0 %64.0 %65.0 %66.0 %67.0 %68.0 %69.0 %70.0 %71.0 %72.0 %73.0 %74.0 %75.0 %76.0 %77.0 %78.0 %79.0 %80.0 %81.0 %82.0 %83.0 %84.0 %85.0 %86.0 %87.0 %88.0 %89.0 %90.0 %91.0 %92.0 %93.0 %94.0 %95.0 %96.0 %97.0 %98.0 %99.0 %100.0 %1.0 %2.0 %3.0 %4.0 %5.0 %6.0 %7.000000000000001 %8.0 %9.0 %10.0 %11.0 %12.0 %13.0 %14.000000000000002 %15.0 %16.0 %17.0 %18.0 %19.0 %20.0 %21.0 %22.0 %23.0 %24.0 %25.0 %26.0 %27.0 %28.000000000000004 %28.999999999999996 %30.0 %31.0 %32.0 %33.0 %34.0 %35.0 %36.0 %37.0 %38.0 %39.0 %40.0 %41.0 %42.0 %4

In [None]:
step_axis = [i for i=1:100:size(g1)[1]]
graph = [g1[i, :] for i in step_axis]
graph = hcat(graph...)'
#plot(1:size(g1)[1], g1, label=["RS","RS_tuned"], title="Accuracy")
#plot(step_axis, graph, title="Accuracy", label=[key for key in keys(algo_dict)], legend=:bottomright)
labels = [key for key in keys(algo_dict)]
plot(step_axis, graph, title="Accuracy", label=labels, xlabel="step",ylabel="accuracy", legend=:bottomright)