In [1]:
using Plots, Statistics, NaNStatistics, LinearAlgebra, Serialization, ParticleFilters, StatsBase

include("../utils/import.jl")
using .Import
using ..Import: State

include("../utils/figs.jl")
using .Figs

# Experiments

## Conditions

**Active Teacher Selection (ATS):** Solve using an approximate POMDP solver (POMCPOW).

**Naive:** Solve by selecting actions randomly for a fixed number of timesteps, making a frequentist estimation of the state, and then always selecting the arm with the highest estimated expected utility thereafter. 

## Run Script

    experiment_scripts/run_exp_naive.sh:
    julia baselines_naive.jl 3 3 0.9 7 7 5 1000 289506
    julia baselines_naive.jl 3 3 0.9 7 7 5 1000 289513
    julia baselines_naive.jl 3 3 0.9 7 7 5 1000 292312
    julia baselines_naive.jl 3 3 0.9 7 7 5 1000 292319
    julia ApproximatePOMDP.jl 3 3 0.9 7 7 5 1000 289506
    julia ApproximatePOMDP.jl 3 3 0.9 7 7 5 1000 289513
    julia ApproximatePOMDP.jl 3 3 0.9 7 7 5 1000 292312
    julia ApproximatePOMDP.jl 3 3 0.9 7 7 5 1000 292319

# Plots
## Set Parameters
### Experiment Names

In [2]:
pomdp_expIDs = ["221117_"*x for x in ["145355", "150543", "151923", "153210"]]
naive_expIDs = ["base_naive_221128_"*x for x in ["143922", "143940", "143959", "144017", "144036"]];

### Experiment Parameters

In [3]:
runs = 10
actions = ["C1", "C2", "C3", "B1", "B2", "B3"]
pomdp_name, naive_name = "ATS", "Naive";

### Plotting Parameters

In [4]:
window = 100
labels = [pomdp_name naive_name];

## Import Data

In [5]:
# takes ~3min
@time begin
    println("importing exp="*string(pomdp_name)*" 1 (exp "*pomdp_expIDs[1]*") data...")
    s_p1, t_p1, a_p1, r_p1, beliefs_p1, final_states_p1, avg_belief_p1, avg_belief_u_p1 = Import.import_experiment(pomdp_expIDs[1], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 2 (exp "*pomdp_expIDs[2]*") data...")
    s_p2, t_p2, a_p2, r_p2, beliefs_p2, final_states_p2, avg_belief_p2, avg_belief_u_p2 = Import.import_experiment(pomdp_expIDs[2], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 3 (exp "*pomdp_expIDs[3]*") data...")
    s_p3, t_p3, a_p3, r_p3, beliefs_p3, final_states_p3, avg_belief_p3, avg_belief_u_p3 = Import.import_experiment(pomdp_expIDs[3], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 4 (exp "*pomdp_expIDs[4]*") data...")
    s_p4, t_p4, a_p4, r_p4, beliefs_p4, final_states_p4, avg_belief_p4, avg_belief_u_p4 = Import.import_experiment(pomdp_expIDs[4], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 5 (exp "*pomdp_expIDs[5]*") data...")
    s_p5, t_p5, a_p5, r_p5, beliefs_p5, final_states_p5, avg_belief_p5, avg_belief_u_p5 = Import.import_experiment(pomdp_expIDs[5], runs, "..")
end;

importing exp=ATS 1 (exp 221117_145355) data...


LoadError: SystemError: opening file "../sims/221117_145355_run6.txt": No such file or directory

In [6]:
@time begin
    println("importing exp="*string(naive_name)*" 1 (exp "*naive_expIDs[1]*") data...")
    s_n1, t_n1, a_n1, r_n1 = Import.get_star(naive_expIDs[1], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 2 (exp "*naive_expIDs[2]*") data...")
    s_n2, t_n2, a_n2, r_n2 = Import.get_star(naive_expIDs[2], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 3 (exp "*naive_expIDs[3]*") data...")
    s_n3, t_n3, a_n3, r_n3 = Import.get_star(naive_expIDs[3], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 4 (exp "*naive_expIDs[4]*") data...")
    s_n4, t_n4, a_n4, r_n4 = Import.get_star(naive_expIDs[4], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 5 (exp "*naive_expIDs[5]*") data...")
    s_n5, t_n5, a_n5, r_n5 = Import.get_star(naive_expIDs[5], runs, "..")
end;

importing exp=Naive 1 (exp base_naive_221117_145245) data...


LoadError: SystemError: opening file "../sims/base_naive_221117_145245_run6.txt": No such file or directory

In [7]:
# combine data across experiments for each condition
t_p = t_p1
s_p = [cat(s_p1, s_p2, s_p3, s_p4, s_p5 dims=1)[runs*i] for i in 1:5]
a_p = cat(a_p1, a_p2, a_p3, a_p4, a_p5, dims=1)
r_p = cat(r_p1, r_p2, r_p3, r_p4, r_p5, dims=1)

t_n = t_n1
s_n = [cat(s_n1, s_n2, s_n3, s_n4, s_n5, dims=1)[runs*i] for i in 1:5]
a_n = cat(a_n1, a_n2, a_n3, a_n4, s_n5, dims=1)
r_n = cat(r_n1, r_n2, r_n3, r_n4, r_n5, dims=1)
;

LoadError: UndefVarError: t_p1 not defined

## Analysis
### General

In [8]:
function print_state(s::State)
    println("\t u: ", s.u)
    println("\t d1: ", s.d[1], "\t (exp val ", dot(s.u, s.d[1]), ")")
    println("\t d1: ", s.d[2], "\t (exp val ", dot(s.u, s.d[2]), ")")
    println("\t d1: ", s.d[3], "\t (exp val ", dot(s.u, s.d[3]), ")")
end

print_state (generic function with 1 method)

In [9]:
# all runs under a given expID have the same start state & optimal action
println("** "*pomdp_name*" condition **")
for i in 1:length(pomdp_expIDs)
    optimal_action, highest_R = Import.get_optimal_arm(s_p[i])

    println("State "*string(i)*":")
    print_state(s_n[i])
    println("Highest-Reward Arm:\t"*string(optimal_action)*" (R="*string(highest_R)*")\n")
end
println("** "*naive_name*" condition **")
for i in 1:length(naive_expIDs)
    optimal_action, highest_R = Import.get_optimal_arm(s_n[i])
    
    println("State "*string(i)*":")
    print_state(s_n[i])
    println("Highest-Reward Arm:\t"*string(optimal_action)*" (R="*string(highest_R)*")\n")
end

** ATS condition **


LoadError: UndefVarError: s_p not defined

In [10]:
Figs.plot_avg_r_multiple_experiments([r_p, r_n], window, labels, "Average Reward")

LoadError: UndefVarError: r_p not defined

In [11]:
# calc avg scaled reward (scaled to max-R arm)
# by construction, first arm is highest reward
s_n_list = cat(s_n1, s_n2, s_n3, s_n4, s_n5, dims=1)
s_p_list = cat(s_p1, s_p2, s_p3, s_p4, s_p5, dims=1)

r_n_scaled = []
for i in 1:length(r_n)
    highest_r = dot(s_n_list[i].u, s_n_list[i].d[1])
    push!(r_n_scaled, r_n[i] ./ highest_r)
end

r_p_scaled = []
for i in 1:length(r_p)
    highest_r = dot(s_p_list[i].u, s_p_list[i].d[1])
    push!(r_p_scaled, r_p[i] ./ highest_r)
end

LoadError: UndefVarError: s_n1 not defined

In [12]:
Figs.plot_avg_r_multiple_experiments_normalized([r_p_scaled, r_n_scaled], window, labels, "Average Reward (scaled)")

LoadError: UndefVarError: r_p_scaled not defined

In [13]:
y = 0.999
Figs.plot_cumulative_avg_r_multiple_experiments([r_p, r_n], y, labels, "Cumulative Discounted (y="*string(y)*") Reward")

LoadError: UndefVarError: r_p not defined

## Action Distributions

In [14]:
Figs.plot_proportion_actions_all(a_p, actions, window, "exp="*pomdp_name*" Actions")

LoadError: UndefVarError: a_p not defined

In [15]:
Figs.plot_proportion_actions_all(a_n, actions, window, "exp="*naive_name*" Actions")

LoadError: UndefVarError: a_n not defined