In [1]:
import json
import numpy as np
from scipy.stats import mannwhitneyu, wilcoxon

# Load the sampled FQE values from JSON file

In [5]:
file_path = r"../reports/output_policy_utility/boostrapped_results_fixed_learned_policies.json"
with open(file_path, 'r') as f:
    data = json.load(f)
    
    
original_vals = np.asarray(data["policies"]["original_30"]["all_bootstrap_values"], dtype=float)
synthetic_60_vals = np.asarray(data["policies"]["synthetic_60"]["all_bootstrap_values"], dtype=float)
synthetic_100_vals = np.asarray(data["policies"]["synthetic_100"]["all_bootstrap_values"], dtype=float)
merged_vals = np.asarray(data["policies"]["merged_120"]["all_bootstrap_values"])

send_no_message = np.asarray(data["policies"]["fixed_action_0"]["all_bootstrap_values"], dtype=float)
send_encouraging_message = np.asarray(data["policies"]["fixed_action_1"]["all_bootstrap_values"], dtype=float)  
send_informing_message = np.asarray(data["policies"]["fixed_action_2"]["all_bootstrap_values"], dtype=float)
send_affirming_message = np.asarray(data["policies"]["fixed_action_3"]["all_bootstrap_values"], dtype=float)


fixed_policies = {
    "No_Message": send_no_message,
    "Encouraging": send_encouraging_message,
    "Informing": send_informing_message,
    "Affirming": send_affirming_message
}

learned_policies = {
    "Original_30": original_vals,
    "Synthetic_60": synthetic_60_vals,
    "Synthetic_100": synthetic_100_vals,
    "Merged_120": merged_vals
}

test_100 = {
    "Original_30": original_vals,
    "Synthetic_60": synthetic_60_vals,
    "Merged_120": merged_vals, 
    "No_Message": send_no_message,
    "Encouraging": send_encouraging_message,
    "Informing": send_informing_message,
    "Affirming": send_affirming_message
}

test_merged = {
    "Original_30": original_vals,
    "Synthetic_60": synthetic_60_vals,
    "Synthetic_100": synthetic_100_vals, 
    "No_Message": send_no_message,
    "Encouraging": send_encouraging_message,
    "Informing": send_informing_message,
    "Affirming": send_affirming_message
}

synthetic_datasets = {
    "Synthetic_60": synthetic_60_vals,
    "Synthetic_100": synthetic_100_vals,
    "Merged_120": merged_vals
}




# Test the hypothesis that the learned policies are better than the fixed policies

In [6]:
wilcoxon_results = {}
p_values = []
comparisons = []


for policy_name, policy_vals in test_100.items():
    stat, p = wilcoxon(synthetic_100_vals, policy_vals, alternative='greater')
    comp = f"Synthetic_100 vs {policy_name}"
    wilcoxon_results[comp] = (stat, p)
    p_values.append(p)
    comparisons.append(comp)


In [7]:
wilcoxon_results

In [12]:
# Test the synthetic data policies against the orignal 
wilcoxon_results_merged = {}
p_values_merged = []
comparisons_merged = []

for policy_name, policy_vals in synthetic_datasets.items():
    stat, p = wilcoxon(policy_vals, original_vals, alternative='greater')
    comp = f"{policy_name}vs Orignal Dataset"
    wilcoxon_results_merged[comp] = (stat, p)
    p_values_merged.append(p)
    comparisons_merged.append(comp)

In [13]:
wilcoxon_results_merged