In [1]:
import pandas as pd
import numpy as np
import pickle

import statsmodels.api as sm
from statsmodels.formula.api import ols

from statsmodels.stats.multicomp import pairwise_tukeyhsd


# Perform statistical analysis on dataset.

In [2]:
with open('../data/cleaned_dataframe.pickle','rb') as read_file:
    df = pickle.load(read_file)

### Treadmill test analysis.

In [6]:
# Performing two-way ANOVA on TREADMILL tests to examine age and sex impact on RPE.

model_TM = ols('peak_rpe ~ C(age_group) + C(Gender) + C(age_group):C(Gender)',
            data=df[df.Mode=="TM"]).fit()
result_TM = sm.stats.anova_lm(model_TM, type=2)
  
# Print the result
print(result_TM)

                             df        sum_sq    mean_sq          F  \
C(age_group)                6.0    147.897280  24.649547  13.595357   
C(Gender)                   1.0      3.407310   3.407310   1.879288   
C(age_group):C(Gender)      6.0     20.717468   3.452911   1.904439   
Residual                11123.0  20166.951993   1.813086        NaN   

                              PR(>F)  
C(age_group)            1.934796e-15  
C(Gender)               1.704428e-01  
C(age_group):C(Gender)  7.615521e-02  
Residual                         NaN  


In [4]:
# Tukey post-hoc analysis for pair-wise comparisons.
# (only looking at within age based on results above).

tukey = pairwise_tukeyhsd(endog=df[df.Mode=="TM"]['peak_rpe'],
                          groups=df[df.Mode=="TM"]['age_group'],
                          alpha=0.05)
# print(tukey)

### Cycling test analysis.


In [5]:
# Performing two-way ANOVA on CYCLING tests to examine age and sex impact on RPE.

model_CY = ols('peak_rpe ~ C(age_group) + C(Gender) + C(age_group):C(Gender)',
            data=df[df.Mode=="CY"]).fit()
result_CY = sm.stats.anova_lm(model_CY, type=2)
  
# Print the result
# print(result_CY)

                           df       sum_sq   mean_sq         F    PR(>F)
C(age_group)              6.0    45.651408  7.608568  4.899036  0.000061
C(Gender)                 1.0     3.782731  3.782731  2.435640  0.118954
C(age_group):C(Gender)    6.0     2.613083  0.435514  0.280420  0.946299
Residual                909.0  1411.744760  1.553075       NaN       NaN


In [6]:
# Tukey post-hoc analysis for pair-wise comparisons.
# (only looking at within age based on results above).

tukey = pairwise_tukeyhsd(endog=df[df.Mode=="CY"]['peak_rpe'],
                          groups=df[df.Mode=="CY"]['age_group'],
                          alpha=0.05)
# print(tukey)

In [4]:
# Performing three-way ANOVA (same as above but including test mode).

model = ols('peak_rpe ~ C(age_group) + C(Gender) + C(Mode)',
            data=df).fit()
result = sm.stats.anova_lm(model, type=2)
  
# Print the result
# print(result)