In [68]:
import numpy as np
import pandas as pd
from scipy.stats import chi2_contingency
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu
import pandas as pd
from statsmodels.stats.proportion import proportions_ztest

import matplotlib.pyplot as plt
import seaborn as sns


#### Physical activity

In [69]:
dfPA_treatment = pd.read_csv("testdata1/2.PA_treatment.csv")
dfPA_balanced = pd.read_csv("testdata1/3.PA_balanced.csv").drop(columns='Treatment')

print("Physical activity Treatment group:", dfPA_treatment.shape)
print("Physical activity Balanced group: ", dfPA_balanced.shape)

Physical activity Treatment group: (7603, 9)
Physical activity Balanced group:  (1156, 9)


In [70]:
print("Balanced:")
print(dfPA_balanced.Age.describe())
print("Treatment:")
print(dfPA_treatment.Age.describe())

Balanced:
count    1156.000000
mean       56.562284
std         7.614893
min        45.000000
25%        51.000000
50%        55.500000
75%        61.000000
max        79.000000
Name: Age, dtype: float64
Treatment:
count    7603.000000
mean       56.692227
std         6.818423
min        23.000000
25%        53.000000
50%        57.000000
75%        61.000000
max        85.000000
Name: Age, dtype: float64


##### Create column outcome in categorical:

In [71]:
# Define the function to categorize PA_changes
def categorize_change(PA_change):
    if PA_change > 0:
        return 'Increased'
    elif PA_change < 0:
        return 'Decreased'
    else:
        return 'Unchanged'

# Apply the function to create a new column in df_PA_Balanced_before
dfPA_treatment['PA_change_category'] = dfPA_treatment['PA_change'].apply(categorize_change)
dfPA_balanced['PA_change_category'] = dfPA_balanced['PA_change'].apply(categorize_change)

##### Calculate Chi2:

In [72]:
dfPA_treatment['Group'] = 'Treatment'
dfPA_balanced['Group'] = 'Balanced'


df_combined = pd.concat([dfPA_treatment, dfPA_balanced])

# Create a contingency table for PA_change_category across Treatment and Balanced groups
contingency_table = pd.crosstab(df_combined['PA_change_category'], df_combined['Group'])

# Perform chi-squared test
chi2, p, dof, ex = chi2_contingency(contingency_table)

print(f"Chi-squared Test for PA_change_category across Treatment and Balanced groups")
print(f"Chi2: {chi2}, p-value: {p}, Degrees of Freedom: {dof}")
print("\nExpected Frequencies:")
print(ex)
print("\nObserved Frequencies:")
print(contingency_table)


Chi-squared Test for PA_change_category across Treatment and Balanced groups
Chi2: 66.93576893573248, p-value: 2.917980965784232e-15, Degrees of Freedom: 2

Expected Frequencies:
[[ 217.63260646 1431.36739354]
 [ 303.55063363 1996.44936637]
 [ 634.8167599  4175.1832401 ]]

Observed Frequencies:
Group               Balanced  Treatment
PA_change_category                     
Decreased                295       1354
Increased                208       2092
Unchanged                653       4157


#### Quality of life

In [73]:
dfQ_treatment = pd.read_csv("testdata1/2.QOL_treatment.csv")
dfQ_control = pd.read_csv("testdata1/2.QOL_control.csv")
dfQ_balanced = pd.read_csv("testdata1/3.QOL_balanced.csv")

In [74]:
# List of file paths and corresponding dataframe names
files = [
    ("testdata1/2.QOL_treatment.csv", "dfQ_treatment"),
    ("testdata1/2.QOL_control.csv", "dfQ_control"),
    ("testdata1/3.QOL_balanced.csv", "dfQ_balanced"),
]

# Dictionary to store dataframes
dataframes = {}

# Loop through each file and read the csv into a dataframe, then store it in the dictionary
for file, name in files:
    dataframes[name] = pd.read_csv(file)
    print(f"{name}: {dataframes[name].shape}")

dfQ_treatment: (7603, 9)
dfQ_control: (1156, 9)
dfQ_balanced: (1156, 10)


In [75]:
# Define the function to categorize PA_changes
def QOL_categorize_significant_change(QOL_change):
    if QOL_change >= 10:
        return 'Increased 10+ points'
    elif QOL_change <= -10:
        return 'Decreased 10+ points'
    else:
        return 'Maintained'

dfQ_treatment['QOL_change_category'] = dfQ_treatment['QOL_change'].apply(QOL_categorize_significant_change)
dfQ_control['QOL_change_category'] = dfQ_control['QOL_change'].apply(QOL_categorize_significant_change)
dfQ_balanced['QOL_change_category'] = dfQ_balanced['QOL_change'].apply(QOL_categorize_significant_change)

In [76]:
dfQ_balanced

Unnamed: 0,Age,Gender,BMI,Depression,Employment_status,Baseline_Physical_Activity,Baseline_Pain,Baseline_Quality_of_life,QOL_change,Treatment,QOL_change_category
0,68.0,1.0,31.6,1.0,1.0,1,30.0,56.25,25.00,0,Increased 10+ points
1,59.0,2.0,26.8,1.0,1.0,2,70.0,37.50,12.50,0,Increased 10+ points
2,59.0,2.0,26.8,1.0,1.0,2,70.0,37.50,12.50,0,Increased 10+ points
3,59.0,2.0,26.8,1.0,1.0,2,70.0,37.50,12.50,0,Increased 10+ points
4,46.0,1.0,30.0,1.0,1.0,1,20.0,68.75,12.50,0,Increased 10+ points
...,...,...,...,...,...,...,...,...,...,...,...
1151,61.0,2.0,33.2,1.0,1.0,3,30.0,37.50,-12.50,0,Decreased 10+ points
1152,61.0,2.0,33.2,1.0,1.0,3,30.0,37.50,-12.50,0,Decreased 10+ points
1153,61.0,2.0,33.2,1.0,1.0,3,30.0,37.50,-12.50,0,Decreased 10+ points
1154,59.0,1.0,34.1,1.0,1.0,3,90.0,37.50,18.75,0,Increased 10+ points


In [77]:
# Add a new column to identify the group
dfQ_treatment['Group'] = 'Treatment'
dfQ_balanced['Group'] = 'Balanced'

# Combine the dataframes
dfQ_combined = pd.concat([dfQ_treatment, dfQ_balanced])

# Create a contingency table for QOL_change_category across Treatment and Balanced groups
contingency_table = pd.crosstab(dfQ_combined['QOL_change_category'], dfQ_combined['Group'])

# Perform chi-squared test
chi2, p, dof, ex = chi2_contingency(contingency_table)

# Display results
print(f"Chi-squared Test for QOL_change_category across Treatment and Balanced groups")
print(f"Chi2: {chi2}, p-value: {p}, Degrees of Freedom: {dof}")
print("\nExpected Frequencies:")
print(ex)
print("\nObserved Frequencies:")
print(contingency_table)


Chi-squared Test for QOL_change_category across Treatment and Balanced groups
Chi2: 35.349359223014204, p-value: 2.108551317805186e-08, Degrees of Freedom: 2

Expected Frequencies:
[[ 141.08505537  927.91494463]
 [ 546.39114054 3593.60885946]
 [ 468.52380409 3081.47619591]]

Observed Frequencies:
Group                 Balanced  Treatment
QOL_change_category                      
Decreased 10+ points       156        913
Increased 10+ points       453       3687
Maintained                 547       3003


#### Pain

In [78]:
dfpain_treatment = pd.read_csv("testdata1/2.Pain_treatment.csv")
dfpain_control = pd.read_csv("testdata1/2.Pain_control.csv")
dfpain_balanced = pd.read_csv("testdata1/3.Pain_balanced.csv")

In [79]:
# List of file paths and corresponding dataframe names
files = [
    ("testdata1/2.Pain_treatment.csv", "dfpain_treatment"),
    ("testdata1/2.Pain_control.csv", "dfpain_control"),
    ("testdata1/3.Pain_balanced.csv", "dfpain_balanced")
]

# Dictionary to store dataframes
dataframes = {}

# Loop through each file and read the csv into a dataframe, then store it in the dictionary
for file, name in files:
    dataframes[name] = pd.read_csv(file)
    print(f"{name}: {dataframes[name].shape}")


dfpain_treatment: (7603, 9)
dfpain_control: (1156, 9)
dfpain_balanced: (1156, 10)


In [80]:
# Define the function to categorize changes of at least 20 points
def pain_categorize_significant_change(pain_change):
    if pain_change <= -20:
        return 'Decreased 20+ points'
    elif pain_change >= 20:
        return 'Increased 20+ points'
    else:
        return 'Maintained'  # This now clearly means the pain changed but less than 20 points either way

dfpain_treatment['pain_change_category'] = dfpain_treatment['pain_change'].apply(pain_categorize_significant_change)
dfpain_control['pain_change_category'] = dfpain_control['pain_change'].apply(pain_categorize_significant_change)
dfpain_balanced['pain_change_category'] = dfpain_balanced['pain_change'].apply(pain_categorize_significant_change)


In [81]:
# Add a new column to identify the group
dfpain_treatment['Group'] = 'Treatment'
dfpain_balanced['Group'] = 'Balanced'

# Combine the dataframes
dfpain_combined = pd.concat([dfpain_treatment, dfpain_balanced])

# Create a contingency table for pain_change_category across Treatment and Balanced groups
contingency_table = pd.crosstab(dfpain_combined['pain_change_category'], dfpain_combined['Group'])

# Perform chi-squared test
chi2, p, dof, ex = chi2_contingency(contingency_table)

# Display results
print(f"Chi-squared Test for pain_change_category across Treatment and Balanced groups")
print(f"Chi2: {chi2}, p-value: {p}, Degrees of Freedom: {dof}")
print("\nExpected Frequencies:")
print(ex)
print("\nObserved Frequencies:")
print(contingency_table)


Chi-squared Test for pain_change_category across Treatment and Balanced groups
Chi2: 320.6828443619327, p-value: 2.3152885620800252e-70, Degrees of Freedom: 2

Expected Frequencies:
[[ 433.28553488 2849.71446512]
 [ 115.61319785  760.38680215]
 [ 607.10126727 3992.89873273]]

Observed Frequencies:
Group                 Balanced  Treatment
pain_change_category                     
Decreased 20+ points       291       2992
Increased 20+ points       279        597
Maintained                 586       4014
