In [7]:
import json
from pathlib import Path

In [8]:
null_hypothesis_name = 'difficult_location_1_results_baseline.json'
alternative_hypothesis_name = 'difficult_location_1_results_baseline_test.json'

null_hypothesis = json.loads(Path(f'experiment_results/{null_hypothesis_name}').read_text())
alternative_hypothesis = json.loads(Path(f'experiment_results/{alternative_hypothesis_name}').read_text())

In [9]:
critical_value = 1.96

def test_hypothesis(null_mean, null_std, alternative_mean, n):
    """Test hypothesis."""
    t = (null_mean - alternative_mean) / (null_std / n ** 0.5)

    # If t is greater than cirtical value, we reject the null hypothesis.
    if t > critical_value:
        print("Reject null hypothesis")
        return 1

    if t < -critical_value:
        print("Reject null hypothesis. Alternative is significantly worse!")
        return -1

    print("Accept null hypothesis")
    return 0


In [10]:
total_points = 0
more_peaks_detected = 0

i = 0
for null, alternative in zip(
        null_hypothesis, alternative_hypothesis
):
    print('=== Experiment', i)
    test_rmae = test_hypothesis(
        null_mean=null["mean_rmae"],
        null_std=null["std_rmae"],
        alternative_mean=alternative["mean_rmae"],
        n=5,
    )

    total_points += test_rmae
    more_peaks_detected += null['avg_missed_pos_peaks'] - alternative['avg_missed_pos_peaks']
    print(f"More peaks detected: {more_peaks_detected}")
    i += 1

print('========== Result')
print(f"Total points: {total_points}")
print(f"More peaks detected: {more_peaks_detected}")


=== Experiment 0
Accept null hypothesis
More peaks detected: 0.0
=== Experiment 1
Accept null hypothesis
More peaks detected: 0.0
=== Experiment 2
Reject null hypothesis. Alternative is significantly worse!
More peaks detected: 0.0
=== Experiment 3
Accept null hypothesis
More peaks detected: 0.0
=== Experiment 4
Accept null hypothesis
More peaks detected: 0.0
=== Experiment 5
Accept null hypothesis
More peaks detected: 0.0
=== Experiment 6
Reject null hypothesis
More peaks detected: 0.0
=== Experiment 7
Accept null hypothesis
More peaks detected: 0.0
=== Experiment 8
Accept null hypothesis
More peaks detected: 0.0
=== Experiment 9
Accept null hypothesis
More peaks detected: 0.0
Total points: 0
More peaks detected: 0.0
