In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf

In [None]:
maxwalk = pd.read_csv("data/maxwalk.csv")
maxchair = pd.read_csv("data/maxchair.csv")
maxchair.shape, maxwalk.shape

In [None]:
maxchair.columns, maxwalk.columns

#### Tukey HSD Post-hoc Test (Table or Heatmap)

In [None]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd

def run_tukey_test(df, outcome):
    tukey = pairwise_tukeyhsd(endog=df[outcome], groups=df['disease_category'], alpha=0.05)
    print(tukey.summary())


In [None]:
# MaxChair
run_tukey_test(maxchair, 'maxgrip')
run_tukey_test(maxchair, 'chair')

In [None]:
# MaxWalk
run_tukey_test(maxwalk, 'maxgrip')
run_tukey_test(maxwalk, 'walking')

##### Sample distribution by disease group

In [None]:
def plot_combined_sample_distribution_percent(maxchair, maxwalk):

    chair_pct = maxchair['disease_category'].value_counts(normalize=True) * 100
    walk_pct = maxwalk['disease_category'].value_counts(normalize=True) * 100

    combined_df = pd.DataFrame({
        'Disease Group': chair_pct.index.tolist() + walk_pct.index.tolist(),
        'Percentage': list(chair_pct.values) + list(walk_pct.values),
        'Age Group': ['Age < 75'] * len(chair_pct) + ['Age ≥ 75'] * len(walk_pct)  # renamed here
    })

    custom_order = [
        'No disease', 'Only OA', 'Only HT', 'Only Diabetes',
        'OA and HT', 'OA and Diab', 'HT and Diab', 'All three diseases'
    ]

    combined_df['Disease Group'] = pd.Categorical(
        combined_df['Disease Group'],
        categories=custom_order,
        ordered=True
    )

    plt.figure(figsize=(12, 6))
    ax = sns.barplot(
        data=combined_df,
        x='Disease Group',
        y='Percentage',
        hue='Age Group',
        palette={'Age < 75': 'lightblue', 'Age ≥ 75': 'lightpink'}
    )

    for container in ax.containers:
        ax.bar_label(container, fmt='%.1f%%', label_type='edge', padding=3, fontsize=9)

    plt.title('Percentage of participants by disease group in adults aged <75 and ≥75 years')
    plt.ylabel('Percentage (%)')
    plt.xlabel('Disease Group')
    plt.xticks(rotation=45)
    plt.legend(title='Age Group')
    plt.tight_layout()
    plt.show()


In [None]:
plot_combined_sample_distribution_percent(maxchair, maxwalk)

##### Grip Strength (kg) 

In [None]:
shared_palette = {
    'Grip Strength (<75)': 'dodgerblue',
    'Grip Strength (≥75)': 'orangered',
    'Chair Stand (sec) for adults <75': 'dodgerblue',
    'Walking Speed (sec) for adults ≥75': 'orangered'
}

In [None]:
def plot_combined_grip_strength(maxchair, maxwalk):
    grip_df_chair = maxchair[['disease_category', 'maxgrip']].copy()
    grip_df_chair['Outcome'] = 'Grip Strength (<75)'

    grip_df_walk = maxwalk[['disease_category', 'maxgrip']].copy()
    grip_df_walk['Outcome'] = 'Grip Strength (≥75)'

    combined_grip = pd.concat([grip_df_chair, grip_df_walk], ignore_index=True)
    combined_grip = combined_grip.rename(columns={'maxgrip': 'Grip Strength (kg)'})

    plt.figure(figsize=(13, 6))
    sns.boxplot(data=combined_grip, x='disease_category', y='Grip Strength (kg)', hue='Outcome', palette=shared_palette)
    plt.title('Grip Strength (kg) by Disease Group')
    plt.xticks(rotation=45)
    plt.xlabel('Disease Group')
    plt.ylabel('Grip Strength (kg)')
    plt.legend(title='Outcome')
    plt.grid(False)
    plt.tight_layout()
    plt.show()

In [None]:
plot_combined_grip_strength(maxchair, maxwalk)

##### Chair Stand & Walking Speed (sec) 

In [None]:

def plot_combined_mobility(maxchair, maxwalk):
    chair_df = maxchair[['disease_category', 'chair']].copy()
    chair_df['Outcome'] = 'Chair Stand (sec) for adults <75'
    chair_df = chair_df.rename(columns={'chair': 'Value'})

    walk_df = maxwalk[['disease_category', 'walking']].copy()
    walk_df['Outcome'] = 'Walking Speed (sec) for adults ≥75'
    walk_df = walk_df.rename(columns={'walking': 'Value'})

    combined_mobility = pd.concat([chair_df, walk_df], ignore_index=True)

    plt.figure(figsize=(13, 6))
    sns.boxplot(data=combined_mobility, x='disease_category', y='Value', hue='Outcome')
    plt.title('Chair Stand and Walking Speed by Disease Group')
    plt.xticks(rotation=45)
    plt.xlabel('Disease Group')
    plt.ylabel('Time (seconds)')
    plt.legend(title='Outcome')    
    plt.grid(False) 
    plt.tight_layout()
    plt.show()

In [None]:
plot_combined_mobility(maxchair, maxwalk)

##### Regression maxgrip min under 75 and uper 75

In [None]:


def plot_grip_strength_regression(maxchair, maxwalk):
    # for adults <75
    model1 = smf.ols('maxgrip ~ C(disease_category) + age + bmi + female + educational_level', data=maxchair).fit()
    df1 = pd.DataFrame({
        'predictor': model1.params.index,
        'coef': model1.params.values,
        'ci_lower': model1.conf_int().iloc[:, 0],
        'ci_upper': model1.conf_int().iloc[:, 1],
        'Outcome': 'Grip Strength (<75)'
    })

    # for adults ≥75
    model2 = smf.ols('maxgrip ~ C(disease_category) + age + bmi + female + educational_level', data=maxwalk).fit()
    df2 = pd.DataFrame({
        'predictor': model2.params.index,
        'coef': model2.params.values,
        'ci_lower': model2.conf_int().iloc[:, 0],
        'ci_upper': model2.conf_int().iloc[:, 1],
        'Outcome': 'Grip Strength (≥75)'
    })


    all_df = pd.concat([df1, df2])
    all_df = all_df[~all_df['predictor'].isin(['Intercept', 'age', 'bmi', 'female', 'educational_level'])]
    all_df['predictor'] = all_df['predictor'].str.replace(r'C\(disease_category\)\[T\.', '', regex=True).str.rstrip(']')
    all_df['predictor'] = all_df['predictor'].astype(str)


    palette = {
        'Grip Strength (<75)': '#1f77b4',  
        'Grip Strength (≥75)': '#d62728'  
    }


    plt.figure(figsize=(10, 6))
    sns.pointplot(
        data=all_df, x='coef', y='predictor', hue='Outcome',
        palette=palette, dodge=0.5, join=False, markers='o'
    )

    
    for _, row in all_df.iterrows():
        color = palette[row['Outcome']]
        plt.plot([row['ci_lower'], row['ci_upper']], [row['predictor'], row['predictor']],
                 color=color, linewidth=2, alpha=0.7)

    plt.axvline(0, color='red', linestyle='--')
    plt.title("Adjusted Regression Coefficients for Grip Strength by Disease Group")
    plt.xlabel("Coefficient Estimate")
    plt.ylabel("Disease Groups")
    plt.legend(title="Outcome")
    plt.tight_layout()
    plt.show()
    
    return all_df



In [None]:
df = plot_grip_strength_regression(maxchair, maxwalk)
print(df)


##### Regression maxchair maxwalk (in the paper Fig6)

In [None]:
def plot_mobility_regression_coeffs_fixed(maxchair, maxwalk):


    model_chair = smf.ols('chair ~ C(disease_category) + age + bmi + female + educational_level', data=maxchair).fit()
    model_walk = smf.ols('walking ~ C(disease_category) + age + bmi + female + educational_level', data=maxwalk).fit()

    # Chair Stand (<75)
    df_chair = pd.DataFrame({
        'predictor': model_chair.params.index,
        'coef': model_chair.params.values,
        'ci_lower': model_chair.conf_int().iloc[:, 0],
        'ci_upper': model_chair.conf_int().iloc[:, 1],
        'Outcome': 'Chair Stand (<75)'  
    })

    # Walking Speed (≥75)
    df_walk = pd.DataFrame({
        'predictor': model_walk.params.index,
        'coef': model_walk.params.values,
        'ci_lower': model_walk.conf_int().iloc[:, 0],
        'ci_upper': model_walk.conf_int().iloc[:, 1],
        'Outcome': 'Walking Speed (≥75)'  
    })

    results = pd.concat([df_chair, df_walk])
    results = results[~results['predictor'].isin(['Intercept', 'age', 'bmi', 'female', 'educational_level'])]
    results['predictor'] = results['predictor'].str.replace(r'C\(disease_category\)\[T\.', '', regex=True).str.rstrip(']')

    palette = {
        'Chair Stand (<75)': '#1f77b4',      
        'Walking Speed (≥75)': '#d62728',    
    }


    plt.figure(figsize=(10, 6))
    sns.pointplot(
        data=results,
        x='coef',
        y='predictor',
        hue='Outcome',
        palette=palette,
        dodge=0.5,
        join=False
    )


    for i, row in results.iterrows():
        plt.plot(
            [row['ci_lower'], row['ci_upper']],
            [row['predictor'], row['predictor']],
            color=palette[row['Outcome']],
            linewidth=1.5,
            alpha=0.8
        )

    plt.axvline(0, color='black', linestyle='--')
    plt.title("Adjusted Regression Coefficients for Mobility Outcomes by Disease Group")
    plt.xlabel("Coefficient Estimate")
    plt.ylabel("Disease Groups")
    plt.legend(title="Outcome")
    plt.tight_layout()
    plt.show()
    
    return results


In [None]:
df_1 = plot_mobility_regression_coeffs_fixed(maxchair, maxwalk)
print(df_1)