In [1]:
import pandas as pd
import numpy as np
import pickle

import statsmodels.api as sm
from statsmodels.formula.api import ols

from statsmodels.stats.multicomp import pairwise_tukeyhsd


# Perform statistical analysis on dataset.

In [2]:
with open('../data/cleaned_dataframe.pickle','rb') as read_file:
    df = pickle.load(read_file)

### Treadmill test analysis.

In [3]:
# Performing two-way ANOVA on TREADMILL tests to examine age and sex impact on RPE.

model_TM = ols('peak_rpe ~ C(age_group) + C(Gender) + C(age_group):C(Gender)',
            data=df[df.Mode=="TM"]).fit()
result_TM = sm.stats.anova_lm(model_TM, type=2)
  
# Print the result
# print(result_TM)

In [4]:
# Tukey post-hoc analysis for pair-wise comparisons.
# (only looking at within age based on results above).

tukey = pairwise_tukeyhsd(endog=df[df.Mode=="TM"]['peak_rpe'],
                          groups=df[df.Mode=="TM"]['age_group'],
                          alpha=0.05)
# print(tukey)

### Cycling test analysis.


In [5]:
# Performing two-way ANOVA on CYCLING tests to examine age and sex impact on RPE.

model_CY = ols('peak_rpe ~ C(age_group) + C(Gender) + C(age_group):C(Gender)',
            data=df[df.Mode=="CY"]).fit()
result_CY = sm.stats.anova_lm(model_CY, type=2)
  
# Print the result
# print(result_CY)

In [6]:
# Tukey post-hoc analysis for pair-wise comparisons.
# (only looking at within age based on results above).

tukey = pairwise_tukeyhsd(endog=df[df.Mode=="CY"]['peak_rpe'],
                          groups=df[df.Mode=="CY"]['age_group'],
                          alpha=0.05)
# print(tukey)

In [7]:
# Performing three-way ANOVA (same as above but including test mode).

model = ols('peak_rpe ~ C(age_group) + C(Gender) + C(Mode)',
            data=df).fit()
result = sm.stats.anova_lm(model, type=2)
  
# Print the result
# print(result)