In [1]:
using Plots, Statistics, NaNStatistics, LinearAlgebra, Serialization, ParticleFilters, StatsBase

include("../utils/import.jl")
using .Import
using ..Import: State

include("../utils/figs.jl")
using .Figs

# Experiments

## Conditions

**Active Teacher Selection (ATS):** Solve using an approximate POMDP solver (POMCPOW).

**Naive:** Solve by selecting actions randomly for a fixed number of timesteps, making a frequentist estimation of the state, and then always selecting the arm with the highest estimated expected utility thereafter. 

# Plots
## Set Parameters
### Experiment Names

In [2]:
# ATS
pomdp_expIDs = ["22128_"*x for x in ["051526", "055025", "06251", "07242", "075856", "083337", "09080", "100546", "10400", "113044", "120617", "130613", "13588", "143446", "15378", "161233", "164944", "174134", "184340", "192058"]]

# Naive
naive_expIDs1 = ["base_naive_22127_194"*x for x in ["857", "913", "929", "945"]]
naive_expIDs2 = ["base_naive_22127_195"*x for x in ["01", "017", "033", "049", "15", "120", "136", "152", "28", "224", "240", "255", "311", "327", "343", "358"]]
naive_expIDs = cat(naive_expIDs1, naive_expIDs2, dims=1)
;

### Experiment Parameters

In [3]:
runs = 25
actions = ["C1", "C2", "C3", "B1", "B2", "B3"]
pomdp_name, naive_name = "ATS", "Naive";

### Plotting Parameters

In [4]:
window = 100
labels = [pomdp_name naive_name];

## Import Data

### Import ATS experiments

In [None]:
@time begin
    println("importing exp="*string(pomdp_name)*" 1 (exp "*pomdp_expIDs[1]*") data...")
    s_p1, t_p1, a_p1, r_p1, beliefs_p1, final_states_p1, avg_belief_p1, avg_belief_u_p1 = Import.import_experiment(pomdp_expIDs[1], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 2 (exp "*pomdp_expIDs[2]*") data...")
    s_p2, t_p2, a_p2, r_p2, beliefs_p2, final_states_p2, avg_belief_p2, avg_belief_u_p2 = Import.import_experiment(pomdp_expIDs[2], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 3 (exp "*pomdp_expIDs[3]*") data...")
    s_p3, t_p3, a_p3, r_p3, beliefs_p3, final_states_p3, avg_belief_p3, avg_belief_u_p3 = Import.import_experiment(pomdp_expIDs[3], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 4 (exp "*pomdp_expIDs[4]*") data...")
    s_p4, t_p4, a_p4, r_p4, beliefs_p4, final_states_p4, avg_belief_p4, avg_belief_u_p4 = Import.import_experiment(pomdp_expIDs[4], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 5 (exp "*pomdp_expIDs[5]*") data...")
    s_p5, t_p5, a_p5, r_p5, beliefs_p5, final_states_p5, avg_belief_p5, avg_belief_u_p5 = Import.import_experiment(pomdp_expIDs[5], runs, "..")
end
;

importing exp=ATS 1 (exp 22128_051526) data...
248.269985 seconds (1.99 G allocations: 76.554 GiB, 4.10% gc time, 0.36% compilation time)
importing exp=ATS 2 (exp 22128_055025) data...


In [None]:
@time begin
    println("importing exp="*string(pomdp_name)*" 6 (exp "*pomdp_expIDs[6]*") data...")
    s_p6, t_p6, a_p6, r_p6, beliefs_p6, final_states_p6, avg_belief_p6, avg_belief_u_p6 = Import.import_experiment(pomdp_expIDs[6], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 7 (exp "*pomdp_expIDs[7]*") data...")
    s_p7, t_p7, a_p7, r_p7, beliefs_p7, final_states_p7, avg_belief_p7, avg_belief_u_p7 = Import.import_experiment(pomdp_expIDs[7], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 8 (exp "*pomdp_expIDs[8]*") data...")
    s_p8, t_p8, a_p8, r_p8, beliefs_p8, final_states_p8, avg_belief_p8, avg_belief_u_p8 = Import.import_experiment(pomdp_expIDs[8], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 9 (exp "*pomdp_expIDs[9]*") data...")
    s_p9, t_p9, a_p9, r_p9, beliefs_p9, final_states_p9, avg_belief_p9, avg_belief_u_p9 = Import.import_experiment(pomdp_expIDs[9], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 10 (exp "*pomdp_expIDs[10]*") data...")
    s_p10, t_p10, a_p10, r_p10, beliefs_p10, final_states_p10, avg_belief_p10, avg_belief_u_p10 = Import.import_experiment(pomdp_expIDs[10], runs, "..")
end
;

In [None]:
@time begin
    println("importing exp="*string(pomdp_name)*" 11 (exp "*pomdp_expIDs[11]*") data...")
    s_p11, t_p11, a_p11, r_p11, beliefs_p11, final_states_p11, avg_belief_p11, avg_belief_u_p11 = Import.import_experiment(pomdp_expIDs[11], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 12 (exp "*pomdp_expIDs[12]*") data...")
    s_p12, t_p12, a_p12, r_p12, beliefs_p12, final_states_p12, avg_belief_p12, avg_belief_u_p12 = Import.import_experiment(pomdp_expIDs[12], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 13 (exp "*pomdp_expIDs[13]*") data...")
    s_p13, t_p13, a_p13, r_p13, beliefs_p13, final_states_p13, avg_belief_p13, avg_belief_u_p13 = Import.import_experiment(pomdp_expIDs[13], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 14 (exp "*pomdp_expIDs[14]*") data...")
    s_p14, t_p14, a_p14, r_p14, beliefs_p14, final_states_p14, avg_belief_p14, avg_belief_u_p14 = Import.import_experiment(pomdp_expIDs[14], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 15 (exp "*pomdp_expIDs[15]*") data...")
    s_p15, t_p15, a_p15, r_p15, beliefs_p15, final_states_p15, avg_belief_p15, avg_belief_u_p15 = Import.import_experiment(pomdp_expIDs[15], runs, "..")
end
;

In [None]:
@time begin
    println("importing exp="*string(pomdp_name)*" 16 (exp "*pomdp_expIDs[16]*") data...")
    s_p16, t_p16, a_p16, r_p16, beliefs_p16, final_states_p16, avg_belief_p16, avg_belief_u_p16 = Import.import_experiment(pomdp_expIDs[16], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 17 (exp "*pomdp_expIDs[17]*") data...")
    s_p17, t_p17, a_p17, r_p17, beliefs_p17, final_states_p17, avg_belief_p17, avg_belief_u_p17 = Import.import_experiment(pomdp_expIDs[17], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 18 (exp "*pomdp_expIDs[18]*") data...")
    s_p18, t_p18, a_p18, r_p18, beliefs_p18, final_states_p18, avg_belief_p18, avg_belief_u_p18 = Import.import_experiment(pomdp_expIDs[18], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 19 (exp "*pomdp_expIDs[19]*") data...")
    s_p19, t_p19, a_p19, r_p19, beliefs_p19, final_states_p19, avg_belief_p19, avg_belief_u_p19 = Import.import_experiment(pomdp_expIDs[19], runs, "..")
end
@time begin
    println("importing exp="*string(pomdp_name)*" 20 (exp "*pomdp_expIDs[20]*") data...")
    s_p20, t_p20, a_p20, r_p20, beliefs_p20, final_states_p20, avg_belief_p20, avg_belief_u_p20 = Import.import_experiment(pomdp_expIDs[20], runs, "..")
end
;

### Import Naive experiments

In [None]:
@time begin
    println("importing exp="*string(naive_name)*" 1 (exp "*naive_expIDs[1]*") data...")
    s_n1, t_n1, a_n1, r_n1 = Import.get_star(naive_expIDs[1], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 2 (exp "*naive_expIDs[2]*") data...")
    s_n2, t_n2, a_n2, r_n2 = Import.get_star(naive_expIDs[2], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 3 (exp "*naive_expIDs[3]*") data...")
    s_n3, t_n3, a_n3, r_n3 = Import.get_star(naive_expIDs[3], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 4 (exp "*naive_expIDs[4]*") data...")
    s_n4, t_n4, a_n4, r_n4 = Import.get_star(naive_expIDs[4], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 5 (exp "*naive_expIDs[5]*") data...")
    s_n5, t_n5, a_n5, r_n5 = Import.get_star(naive_expIDs[5], runs, "..")
end
;

In [None]:
@time begin
    println("importing exp="*string(naive_name)*" 6 (exp "*naive_expIDs[6]*") data...")
    s_n6, t_n6, a_n6, r_n6 = Import.get_star(naive_expIDs[6], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 7 (exp "*naive_expIDs[7]*") data...")
    s_n7, t_n7, a_n7, r_n7 = Import.get_star(naive_expIDs[7], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 8 (exp "*naive_expIDs[8]*") data...")
    s_n8, t_n8, a_n8, r_n8 = Import.get_star(naive_expIDs[8], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 9 (exp "*naive_expIDs[9]*") data...")
    s_n9, t_n9, a_n9, r_n9 = Import.get_star(naive_expIDs[9], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 10 (exp "*naive_expIDs[10]*") data...")
    s_n10, t_n10, a_n10, r_n10 = Import.get_star(naive_expIDs[10], runs, "..")
end
;

In [None]:
@time begin
    println("importing exp="*string(naive_name)*" 11 (exp "*naive_expIDs[11]*") data...")
    s_n11, t_n11, a_n11, r_n11 = Import.get_star(naive_expIDs[11], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 12 (exp "*naive_expIDs[12]*") data...")
    s_n12, t_n12, a_n12, r_n12 = Import.get_star(naive_expIDs[12], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 13 (exp "*naive_expIDs[13]*") data...")
    s_n13, t_n13, a_n13, r_n13 = Import.get_star(naive_expIDs[13], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 14 (exp "*naive_expIDs[14]*") data...")
    s_n14, t_n14, a_n14, r_n14 = Import.get_star(naive_expIDs[14], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 15 (exp "*naive_expIDs[15]*") data...")
    s_n15, t_n15, a_n15, r_n15 = Import.get_star(naive_expIDs[15], runs, "..")
end
;

In [None]:
@time begin
    println("importing exp="*string(naive_name)*" 16 (exp "*naive_expIDs[16]*") data...")
    s_n16, t_n16, a_n16, r_n16 = Import.get_star(naive_expIDs[16], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 17 (exp "*naive_expIDs[17]*") data...")
    s_n17, t_n17, a_n17, r_n17 = Import.get_star(naive_expIDs[17], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 18 (exp "*naive_expIDs[18]*") data...")
    s_n18, t_n18, a_n18, r_n18 = Import.get_star(naive_expIDs[18], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 19 (exp "*naive_expIDs[19]*") data...")
    s_n19, t_n19, a_n19, r_n19 = Import.get_star(naive_expIDs[19], runs, "..")
end
@time begin
    println("importing exp="*string(naive_name)*" 20 (exp "*naive_expIDs[20]*") data...")
    s_n20, t_n20, a_n20, r_n20 = Import.get_star(naive_expIDs[20], runs, "..")
end
;

In [None]:
# combine data across experiments for each condition
t_p = t_p1
s_p = [cat(s_p1, s_p2, s_p3, s_p4, s_p5, s_p6, s_p7, s_p8, s_p9, s_p10, s_p11, s_p12, s_p13, s_p14, s_p15, s_p16, s_p17, s_p18, s_p19, s_p20, dims=1)[runs*i] for i in 1:20]
a_p = cat(a_p1, a_p2, a_p3, a_p4, a_p5, a_p6, a_p7, a_p8, a_p9, a_p10, a_p11, a_p12, a_p13, a_p14, a_p15, a_p16, a_p17, a_p18, a_p19, a_p20, dims=1)
r_p = cat(r_p1, r_p2, r_p3, r_p4, r_p5, r_p6, r_p7, r_p8, r_p9, r_p10, r_p11, r_p12, r_p13, r_p14, r_p15, r_p16, r_p17, r_p18, r_p19, r_p20, dims=1)

t_n = t_n1
s_n = [cat(s_n1, s_n2, s_n3, s_n4, s_n5, s_n6, s_n7, s_n8, s_n9, s_n10, s_n11, s_n12, s_n13, s_n14, s_n15, s_n16, s_n17, s_n18, s_n19, s_n20, dims=1)[runs*i] for i in 1:20]
a_n = cat(a_n1, a_n2, a_n3, a_n4, a_n5, a_n6, a_n7, a_n8, a_n9, a_n10, a_n11, a_n12, a_n13, a_n14, a_n15, a_n16, a_n17, a_n18, a_n19, a_n20, dims=1)
r_n = cat(r_n1, r_n2, r_n3, r_n4, r_n5, r_n6, r_n7, r_n8, r_n9, r_n10, r_n11, r_n12, r_n13, r_n14, r_n15, r_n16, r_n17, r_n18, r_n19, r_n20, dims=1)
;

## Analysis
### General

In [None]:
function print_state(s::State)
    println("\t u: ", s.u)
    println("\t d1: ", s.d[1], "\t (exp val ", dot(s.u, s.d[1]), ")")
    println("\t d1: ", s.d[2], "\t (exp val ", dot(s.u, s.d[2]), ")")
    println("\t d1: ", s.d[3], "\t (exp val ", dot(s.u, s.d[3]), ")")
end

In [None]:
# all runs under a given expID have the same start state & optimal action
println("** "*pomdp_name*" condition **")
for i in 1:length(pomdp_expIDs)
    optimal_action, highest_R = Import.get_optimal_arm(s_p[i])

    println("State "*string(i)*":")
    print_state(s_n[i])
    println("Highest-Reward Arm:\t"*string(optimal_action)*" (R="*string(highest_R)*")\n")
end
println("** "*naive_name*" condition **")
for i in 1:length(naive_expIDs)
    optimal_action, highest_R = Import.get_optimal_arm(s_n[i])
    
    println("State "*string(i)*":")
    print_state(s_n[i])
    println("Highest-Reward Arm:\t"*string(optimal_action)*" (R="*string(highest_R)*")\n")
end

In [None]:
Figs.plot_avg_r_multiple_experiments([r_p, r_n], window, labels, "Average Reward")

In [None]:
y = 0.999
Figs.plot_cumulative_avg_r_multiple_experiments([r_p, r_n], y, labels, "Cumulative Discounted (y="*string(y)*") Reward")

## Action Distributions

In [None]:
Figs.plot_actions_in_list_rolling_multiple_experiments([a_p, a_n], ["C1"], window, labels, "Proportion Actions in ")

In [None]:
Figs.plot_proportion_actions_all(a_p, actions, window, "exp="*pomdp_name*" Actions")

In [None]:
Figs.plot_proportion_actions_all(a_n, actions, window, "exp="*naive_name*" Actions")

In [None]:
# beliefs_p3, final_states_p3, avg_belief_p3, avg_belief_u_p3
i=2
optimal_action, highest_R = Import.get_optimal_arm(s_p[i])

println("State "*string(i)*":")
print_state(s_n[i])
println("Highest-Reward Arm:\t"*string(optimal_action)*" (R="*string(highest_R)*")\n")

for i in 1:length(final_states_p2)
    println("\nConverged state"*string(i)*":")
    print_state(final_states_p2[i])
end
plot(
    1:length(avg_belief_p2),
    avg_belief_p2,
    title = "belief on converged state",
    legend = :bottomright
)

# final_states_p3