In [67]:
import json

import plotly.graph_objects as go
import statsmodels
import statsmodels.api
from scipy import stats

In [68]:
with open("./sample_data.json", "r") as f:
    sample_data = json.load(f)

In [69]:
hot_exertion_scores = sample_data["hot_exertion_scores"] # temp >= 27C
regular_temp_exertion_scores = sample_data["regular_temp_exertion_scores"] # temp < 27C

humid_exertion_scores = sample_data["humid_exertion_scores"] # humidity >= 60%
regular_humidity_exertion_scores = sample_data["regular_humidity_exertion_scores"] # humidity < 60%

# Significance Test Selection

In [70]:
fig = go.Figure()

fig.add_trace(go.Histogram(x=regular_temp_exertion_scores, opacity=0.50, name="Regular (< 27C)"))
fig.add_trace(go.Histogram(x=hot_exertion_scores, opacity=0.50, name="Hot (>= 27C)"))

fig.update_layout(barmode='overlay')
fig.update_layout(title_text='Exertion Scores by Perceived Temperature')
fig.update_xaxes(title_text='Exertion Score')
fig.update_yaxes(title_text='Frequency')

fig.show()

It appears that the exertion scores of both temperature groups roughly follows a normal distribution.

In [71]:
fig = go.Figure()

fig.add_trace(go.Histogram(x=regular_humidity_exertion_scores, opacity=0.75, name="Regular (< 60%)"))
fig.add_trace(go.Histogram(x=humid_exertion_scores, opacity=0.40, name="Humid (>= 60%)"))

fig.update_layout(barmode='overlay')
fig.update_layout(title_text='Exertion Scores by Perceived Humidity')
fig.update_xaxes(title_text='Exertion Score')
fig.update_yaxes(title_text='Count')

fig.show()

It appears that the exertion scores of both temperature groups follows a normal distribution.

## Verifying the Normality of Every Sample Distribution

The test being used is the Shapiro Wilk Test. For more information of verifying normality [click here](https://towardsdatascience.com/6-ways-to-test-for-a-normal-distribution-which-one-to-use-9dcf47d8fa93).

In [72]:
regular_temp_p_value, hot_p_value, regular_humidity_p_value, humid_p_value = (
    round(stats.shapiro(regular_temp_exertion_scores).pvalue, 4),
    round(stats.shapiro(hot_exertion_scores).pvalue, 4),
    round(stats.shapiro(regular_humidity_exertion_scores).pvalue, 4),
    round(stats.shapiro(humid_exertion_scores).pvalue, 4),
)

def display_normality_test(p_value, sample_name):
  print(
    "P-Value =",
    p_value,
    (
        f"< 0.05 therefore assume the {sample_name} sample distribution is normal."
        if p_value >= 0.05
        else f">= 0.05 therefore the {sample_name} sample distribution is not normal."
    ),
  )

display_normality_test(regular_temp_p_value, "Regular Temp Exertion Score")
display_normality_test(hot_p_value, "Hot Temp Exertion Score")
display_normality_test(regular_humidity_p_value, "Regular Humidity Exertion Score")
display_normality_test(humid_p_value, "Humid Humidity Exertion Score")

P-Value = 0.1306 < 0.05 therefore assume the Regular Temp Exertion Score sample distribution is normal.
P-Value = 0.4763 < 0.05 therefore assume the Hot Temp Exertion Score sample distribution is normal.
P-Value = 0.1492 < 0.05 therefore assume the Regular Humidity Exertion Score sample distribution is normal.
P-Value = 0.2161 < 0.05 therefore assume the Humid Humidity Exertion Score sample distribution is normal.


# Temperature Experiment

1. Null Hypothesis ($H_0$): mean exertion score on hot days is greater than the mean exertion score on regular days: $\overline{\textnormal{exertion score}_{regular}} \leq \overline{\textnormal{exertion score}_{hot}}$
2. Alternative Hypothesis ($H_A$): $\overline{\textnormal{exertion score}_{regular}} \gt \overline{\textnormal{exertion score}_{hot}}$

In [73]:
t_statistic, p_value, _ = statsmodels.stats.weightstats.ttest_ind(
    regular_temp_exertion_scores,
    hot_exertion_scores,
    alternative="larger",
    usevar="unequal",
)
print(f"Temperature Test Statistic: {round(t_statistic, 4)}")
print(f"Temperature P-Value: {round(p_value, 4)}\n")

Temperature Test Statistic: -3.0466
Temperature P-Value: 0.998



## Temperature Experiment Result

The P-Value = 0.9980 > 0.5, therefore we cannot reject the Null Hypothesis. Meaning that we do not significant evidence that the mean exertion score of workouts on regular days is greater than the mean of exertion score of workouts on hot days.

# Humidity Experiment

1. Null Hypothesis ($H_0$): mean exertion score on humid days is greater than the mean exertion score on regular days $\overline{\textnormal{exertion score}_{regular}} \leq \overline{\textnormal{exertion score}_{humid}}$
2. Alternative Hypothesis ($H_A$): $\overline{\textnormal{exertion score}_{regular}} \gt \overline{\textnormal{exertion score}_{humid}}$

In [74]:
print("Temperature Z-Test (regular humidity exertion < humid exertion)")
t_statistic, p_value = statsmodels.stats.weightstats.ztest(
    regular_humidity_exertion_scores,
    humid_exertion_scores,
    alternative="larger",
    usevar="unequal",
)
print(f"Temperature Test Statistic: {round(t_statistic, 4)}")
print(f"Temperature P-Value: {round(p_value, 4)}\n")

Temperature Z-Test (regular humidity exertion < humid exertion)
Temperature Test Statistic: -1.297
Temperature P-Value: 0.9027



## Humidity Experiment Result

The P-Value = 0.9027 > 0.5, therefore we cannot reject the Null Hypothesis. Meaning that we do not significant evidence that the mean exertion score of workouts on regular days is greater than the mean of exertion score of workouts on humid days.