## JONS Paper

In [None]:
import Pkg; Pkg.activate(".")

In [None]:
using Revise

In [None]:
using ArgCheck
using Distributions
using HMMBase
using ParsimoniousMonitoring
using PyPlot
using Random

In [None]:
using POMDPs
using POMDPModelTools
using POMDPSimulators
using DiscreteValueIteration

In [None]:
# TODO: Implement only one route in receding horizon

### 8.1 A first simple example

In [None]:
# A discrete probability distribution with a single value.
constdist(x) = DiscreteNonParametric([x], [1.0])

In [None]:
# Deterministic path
p1 = HMM(ones(1,1), [constdist(8.0)])
# Stochatich path
p2 = HMM([0.99 0.01; 0.02 0.98], [constdist(5.0), constdist(10.0)]);
# τmax = 150, c = 0.65
mdp = MonitoringMDP([p1, p2], [150, 150], [0, 0.65]);

In [None]:
fig, ax = subplots(figsize = (10, 3))
ax.plot(rand(p1, 3000), label="Deterministic path")
ax.plot(rand(p2, 3000), label="Stochastic path")
ax.set_xlabel("Timestep")
ax.set_ylabel(L"$L(t)$")
ax.set_ylim(4, 12)
ax.legend();

#### Greedy policy

In [None]:
function thresholds(mdp::MonitoringMDP{2})
    @argcheck size(mdp.models[1], 1) == 1 # Deterministic link
    @argcheck size(mdp.models[2], 1) == 2 # Stochastic link
    c = mdp.costs[2]
    l = mean(mdp.models[1].B[1])
    l0, l1 = mean.(mdp.models[2].B)
    c / (l - l0), 1 - c / (l1 - l)
end;

In [None]:
xmin, xmax = thresholds(mdp)

In [None]:
# hr = HistoryRecorder(max_steps = 3000, show_progress = true)
# s0 = rand(states(mdp))
# h_greedy = simulate(hr, mdp, GreedyPolicy(mdp), s0)
# # h_always = simulate(hr, mdp, always_measure_policy(P), s0);
# # h_never = simulate(hr, mdp, ConstantPolicy((false,false)), s0);
# # h_mdp = simulate(hr, mdp, policy, s0);
# # h_rh = simulate(hr, mdp,  OnlineRecedingHorizonPolicy(mdp, 4), s0);

In [None]:
# predictor = map(h_greedy) do history
#     state = history.s[2]
#     (mdp.models[2].A^(state.timesteps+1))[state.laststate,1]
# end;

In [None]:
# fig, ax = subplots(figsize = (10, 3))
# ax.plot(predictor)
# ax.axhline(xmin, c = "black", ls = "--", lw = 1.0, label = "xmin")
# ax.axhline(xmax, c = "black", ls = "--", lw = 1.0, label = "xmax")
# ax.set_xlabel("Timestep")
# ax.set_ylabel(L"γ_{t-1,t}(1)")
# ax.set_ylim(0, 1.0)
# ax.legend(loc = "upper right");

In [None]:
# logbook = benchmark(mdp, GreedyPolicy(mdp), hcat(rand(mdp.models[1], 3000), rand(mdp.models[2], 3000)));
logbook = benchmark(mdp, RecedingHorizonPolicy(mdp, 4), hcat(rand(mdp.models[1], 3000), rand(mdp.models[2], 3000)), show_progress = true);

In [None]:
predictor = map(logbook) do history
    state = history.s[2]
    (mdp.models[2].A^(state.timesteps+1))[state.laststate,1]
end;

In [None]:
?@timed

In [None]:
fig, ax = subplots(figsize = (10, 3))
ax.plot(predictor)
ax.axhline(xmin, c = "black", ls = "--", lw = 1.0, label = "xmin")
ax.axhline(xmax, c = "black", ls = "--", lw = 1.0, label = "xmax")
ax.set_xlabel("Timestep")s
ax.set_ylabel(L"γ_{t-1,t}(1)")
ax.set_ylim(0, 1.0)
ax.legend(loc = "upper right");

In [None]:
using DataFrames
fig, ax = subplots(figsize = (10, 3))
ax.plot(DataFrame(logbook).delay)
ax.plot(DataFrame(logbook).delay_opt)

In [None]:
logbook = benchmark(mdp, GreedyPolicy(mdp), hcat(rand(mdp.models[1], 3000), rand(mdp.models[2], 3000)))

In [None]:
h_greedy[1]

In [None]:
Random.seed!(2020)

In [None]:

p1 = HMM(ones(1,1), [Normal(8,0)])
p2 = HMM([0.99 0.01; 0.02 0.98], [Normal(5,0), Normal(10,0)])
mdp = MonitoringMDP([p1, p2], [150, 150], [0.65, 0.65]);
@time smdp = SparseTabularMDP(mdp);

In [None]:
solver = SparseValueIterationSolver(max_iterations=1000, belres=1e-6)
policy = solve_sparse(solver, mdp, smdp, 0.99);

In [None]:
# In this case the belief space is a line [0,1] which represents 
# the probability of the stochastic path being in state 1.
belief_1d = map(states(mdp)) do state
    (mdp.models[2].A^state[2].timesteps)[state[2].laststate,1]
end;

# Order the policy by belief values, and find the thresholds
perm = sortperm(belief_1d)
sorted_belief, sorted_policy = belief_1d[perm], policy.policy[perm]
sorted_belief[findall(sorted_policy[2:end] .!= sorted_policy[1:end-1]) .+ 1]

In [None]:
function thresholds(mdp::MonitoringMDP{2})
    @argcheck size(mdp.models[1], 1) == 1 # Deterministic link
    @argcheck size(mdp.models[2], 1) == 2 # Stochastic link
    c = mdp.costs[2]
    l = mean(mdp.models[1].B[1])
    l0, l1 = mean.(mdp.models[2].B)
    c / (l - l0), 1 - c / (l1 - l)
end

In [None]:
xmin, xmax = thresholds(mdp)

In [None]:
fig, ax = subplots(figsize = (3, 1.0))
ax.scatter(belief_1d, ones(length(belief_1d)), c = policy.policy, s = 1.0)
ax.axvline.([xmin, xmax], lw = 1.0);

### NOMS paper

Dire dans la these qu'on peut appliquer RH en ligne puisque pas besoin de visiter tout les états.

In [None]:
using ParsimoniousMonitoring: OnlineRecedingHorizonPolicy

In [None]:
hr = HistoryRecorder(max_steps = 3000, show_progress = true)
s0 = rand(states(mdp))
h_always = simulate(hr, mdp, ConstantPolicy((false,true)), s0);
h_never = simulate(hr, mdp, ConstantPolicy((false,false)), s0);
h_mdp = simulate(hr, mdp, policy, s0);
h_rh = simulate(hr, mdp,  OnlineRecedingHorizonPolicy(mdp, 4), s0);

In [None]:
figure(figsize=(4,4))
plot(cumsum(map(x -> x[:r], h_always.hist)))
plot(cumsum(map(x -> x[:r], h_never.hist)))
plot(cumsum(map(x -> x[:r], h_mdp.hist)))
plot(cumsum(map(x -> x[:r], h_rh.hist)))

In [None]:
# function belief_1d(mdp::MonitoringMDP, p::Int, k::Int)
#     states_ = states(mdp)
#     belief = Vector{Float64}(undef, length(states_))
#     model = mdp.models[p]
#     for (i, state) in enumerate(states_)
#         belief[i] = (model.A^state[p].timesteps)[state[p].laststate,k]
#     end
#     belief
# end

In [None]:
belief = belief_1d(mdp, 2, 1)
fig, ax = subplots(figsize = (3, 1.0))
ax.scatter(belief, ones(length(belief)), c = policy.policy, s = 1.0)

In [None]:
hr = HistoryRecorder(max_steps=3000)
s0 = rand(states(mdp))
h_always = simulate(hr, mdp, ConstantPolicy((false,true)), s0);
h_never = simulate(hr, mdp, ConstantPolicy((false,false)), s0);
h_mdp = simulate(hr, mdp, policy, s0);

In [None]:
plot(cumsum(map(x -> x[:r], h_always.hist)))
plot(cumsum(map(x -> x[:r], h_never.hist)))
plot(cumsum(map(x -> x[:r], h_mdp.hist)))

In [None]:
rand(states(mdp))

In [None]:
function belief_1d(mdp::MonitoringMDP, p::Int, k::Int)
    states_ = states(mdp)
    belief = Vector{Float64}(undef, length(states_))
    model = mdp.models[p]
    for (i, (state)) in enumerate(states_)
        timesteps, laststate = getstate(state)[p]
        belief[i] = (model.A^timesteps)[laststate,k]
    end
    belief
end

In [None]:
# TODO: Plot value function

In [None]:
belief = belief_1d(mdp, 2, 1)
fig, ax = subplots(figsize = (3, 1.0))
ax.scatter(belief, ones(length(belief)), c = res.policy, s = 1.0)

In [None]:
struct ConstantPolicy <: Policy
    action::CartesianIndex
end
POMDPs.action(policy::ConstantPolicy, _) = policy.action

In [None]:
struct MDPPolicy <: Policy
    mdp::MonitoringMDP
    policy::Vector{Int}
end

function MDPPolicy(mdp::MonitoringMDP, policy::ValueIterationPolicy)
    MDPPolicy(mdp, policy.policy)
end

function POMDPs.action(policy::MDPPolicy, s)
    state = stateindex(mdp, s)
    action = policy.policy[state]
    actions(mdp)[action]
end

In [None]:
# pol = ConstantPolicy(CartesianIndex(1,1))
# pol = MDPPolicy(mdp, res);

In [None]:
# rs = RolloutSimulator(max_steps=10)
# r = simulate(rs, mdp, pol, rand(mdp.states))

In [None]:
s0 = rand(mdp.states);
# s0 = CartesianIndex(0, 1, 0, 1);

In [None]:
hr = HistoryRecorder(max_steps=3000)
h_always = simulate(hr, mdp, ConstantPolicy(CartesianIndex(0,1)), s0);
h_never = simulate(hr, mdp, ConstantPolicy(CartesianIndex(0,0)), s0);
h_mdp = simulate(hr, mdp, MDPPolicy(mdp, res), s0);

In [None]:
sum(map(x -> x[:a] == CartesianIndex(0,1), h_mdp.hist))

In [None]:
mean(map(x -> x[:r], h_mdp.hist))

In [None]:
plot(cumsum(map(x -> x[:r], h_always.hist)))
plot(cumsum(map(x -> x[:r], h_never.hist)))
plot(cumsum(map(x -> x[:r], h_mdp.hist)))

https://github.com/JuliaPOMDP/POMDPExamples.jl/blob/master/notebooks/Defining-a-Heuristic-Policy.ipynb

In [None]:
h_greedy[1]

In [None]:
x, y = [], []
for (i, action) in enumerate(res.policy)
    action = getaction(actions(mdp)[action])
    timesteps, laststate = getstate(states(mdp)[i])[2]
    push!(x, (p2.A^timesteps)[laststate,1])
    push!(y, action[2])
end
scatter(x, y)

In [None]:
x, y = [], []
for (i, action) in enumerate(res.policy)
    action = getaction(actions(mdp)[action])
    timesteps, laststate = getstate(states(mdp)[i])[2]
    push!(x, (p2.A^timesteps)[laststate,1])
    push!(y, action[2])
end
scatter(x, y)

#### 8.2 Two Markov chains of two states each

In [None]:
# TODO: Use DiscreteNonParametric instead of 0-variance Normal distn.
p1 = HMM([0.7 0.3; 0.3 0.7], [Normal(0.5, 0), Normal(2.0, 0)])
p2 = HMM([0.9 0.1; 0.1 0.9], [Normal(1.0,0), Normal(3.0,0)])
mdp = MonitoringMDP([100, 100], [p1, p2], [0.05, 0.15], 0.01);

In [None]:
smdp = SparseTabularMDP(mdp);

In [None]:
solver = SparseValueIterationSolver(max_iterations=100, belres=1e-6, verbose=true)
res = solve(solver, smdp);

In [None]:
x, y, z = [], [], []
for (i, action) in enumerate(res.policy)
    state = getstate(states(mdp)[i])
    timesteps, laststate = state[1]
    push!(x, (p1.A^(timesteps+1))[laststate,1])
    timesteps, laststate = state[2]
    push!(y, (p2.A^(timesteps+1))[laststate,1])
    push!(z, action)
end
# scatter(x, y)

In [None]:
scatter(x, y, c=z)
xlim(0,1)
ylim(0,1)

TODO: Implement https://juliapomdp.github.io/POMDPModelTools.jl/latest/visualization.html

## Simulation

https://juliapomdp.github.io/POMDPSimulators.jl/stable/parallel/#Parallel-1