## NOMS Paper (5.1)

In [1]:
import Pkg; Pkg.activate(".")

[32m[1m Activating[22m[39m environment at `~/thesis-sandbox/ParsimoniousMonitoring/notebooks/Project.toml`


In [2]:
using Distributions
using HMMBase
using JSON
using ParsimoniousMonitoring
using ProgressMeter

In [3]:
using POMDPs
using POMDPModelTools
using DiscreteValueIteration

In [4]:
MRE(baseline, candidate) = mean((candidate .- baseline) ./ baseline);

In [5]:
function synthetic_mdp(β, τmax)
    p1 = p2 = HMM([β 1 - β; 1 - β β], [Constant(0.01), Constant(100)])
    MonitoringMDP([p1, p2], [τmax, τmax], [25, 25], 0.99)
end;

In [6]:
function eval_policies(mdp, policies)
    results = Dict{String,Float64}()

    # Baseline VI policy
    solver = SparseValueIterationSolver(max_iterations = 5000, belres = 1e-6)
    smdp = SparseTabularMDP(mdp)

    baseline_policy = solve_sparse(solver, mdp, smdp)
    timing = @timed baseline = evaluate(mdp, baseline_policy).(states(mdp))
    println("Baseline: $(timing[2])s")

    # Candidate policies
    for (name, policy_fn) in policies
        policy = policy_fn(mdp)
        timing = @timed vf = evaluate(mdp, policy).(states(mdp))
        println("$(name): $(timing[2])s")
        results[name] = MRE(baseline, vf)
    end

    results
end;

### Main

In [13]:
policies = Dict{String,Function}(
    "Greedy" => mdp -> GreedyPolicy(mdp),
    "RH-2" => mdp -> RecedingHorizonPolicy(mdp, 2),
    "RH-3" => mdp -> RecedingHorizonPolicy(mdp, 3),
    # "RH-4" => mdp -> RecedingHorizonPolicy(mdp, 4),
);

In [14]:
βs = vcat(0.01:0.01:0.05, 0.1:0.1:0.9, 0.95:0.01:0.99);
results = Vector{Dict}(undef, size(βs));

In [16]:
@time Threads.@threads for i in eachindex(βs)
    mdp = synthetic_mdp(βs[i], 100)
    results[i] = eval_policies(mdp, policies)
end

Baseline: 0.930965837s
Baseline: 1.120300826s
Baseline: 1.126520803s
Baseline: 2.743909355s
Baseline: 3.049894853s
Baseline: 3.242578989s
Baseline: 3.370767557s
Baseline: 3.373283181s
Greedy: 18.251251713s
Greedy: 18.091533499s
Greedy: 18.921235214s
Greedy: 23.739339641s
Greedy: 23.774789583s
Greedy: 23.982895254s
Greedy: 23.945142225s
Greedy: 24.586533794s
RH-2: 146.232203737s
RH-2: 149.493289629s
RH-2: 150.103817107s
RH-2: 176.494557037s
RH-2: 176.942993401s
RH-2: 178.796621664s
RH-2: 180.071152424s
RH-2: 184.212603232s
RH-3: 500.19712602s
RH-3: 511.36354961s
RH-3: 511.093181759s
Baseline: 2.760275237s
Greedy: 11.778632832s
Baseline: 2.560750996s
Baseline: 2.679005295s
Greedy: 18.386913431s
Greedy: 18.283494214s
RH-3: 601.344229748s
RH-3: 602.408970529s
RH-3: 607.179400669s
RH-3: 611.095200954s
RH-3: 616.316825584s
Baseline: 1.7170648s
Baseline: 2.347500293s
RH-2: 133.418183535s
Baseline: 3.077304218s
Baseline: 3.406083001s
Greedy: 17.119973017s
Greedy: 17.83256699s
RH-2: 131.1226477