In [1]:
import pandas as pd
import numpy as np

# Load the dataset from a CSV file
sauna_2025 = pd.read_csv("Anonymised Community Sauna Annual 2024 Survey (Responses).csv")
print("Shape: ",sauna_2025.shape)

Shape:  (1799, 35)


# Cleaning

## Dropping sensitive data

In [2]:
drop_list = [
    """ID""", 
    """What is your name?""", 
    """What is your sexual orientation?""",
    """Overall, how satisfied are you with your life nowadays? (where 0 is “not at all” and 10 is “completely”)""",
    """Overall, to what extent do you feel that the things you do in your life are worthwhile? (where 0 is “not at all” and 10 is “completely”)""",
    """I’ve been feeling optimistic about the future""",
    """I’ve been feeling useful""",
    """I’ve been feeling relaxed""",
    """I’ve been dealing with problems well""",
    """I’ve been thinking clearly""",
    """I’ve been feeling close to other people""",
    """I’ve been able to make up my own mind about things""",
]

# dropping columns for anonymity and irrelevance to analysis
sauna_2025 = sauna_2025.drop(columns=drop_list, errors="ignore")
df = sauna_2025.drop(columns=sauna_2025.columns[-1], errors="ignore")

In [3]:
rename_map = {
    """What is your age group?""": "age",
    """What gender do you identify with?""": "gender",
    """How would you describe your ethnicity?""": "ethnicity",
    """Do you consider yourself to have a disability or long term health condition?""": "disability_health_condition",
    """Please indicate the following about your employment status:""": "employment_status",
    """Where do you currently live?""": "location",
    """How often do you visit our sauna?""": "visit_frequency",
    """Which of our sauna locations do you visit most often?""": "location_frequency",
    """Have you experienced any physical health improvements from using the sauna?""": "physical_health_improvements",
    """Have you experienced any mental health improvements from using the sauna?""": "mental_health_improvements",
    """How important is having regular access to sauna? (On a scale of 1 to 5, where 1 is low and 5 is high)""": "sauna_access_importance",
    """What do you enjoy most about the community sauna?""": "enjoyment_reasons",
    """Do you feel a sense of belonging or community when using the sauna? On a scale of 1 to 5, where 1 is low and 5 is high""": "belonging_rating",
    """Please share why community sauna is important to you?""": "importance_reason",
}


# renaming columns for quick reference
df = df.rename(columns=rename_map)

df

Unnamed: 0,Timestamp,age,gender,ethnicity,disability_health_condition,employment_status,What is your religious belief?,location,Had you been to a sauna before you came to Community Sauna?,How did you hear about the Community Sauna?,...,"How would you rate the overall ambiance and atmosphere of the sauna?\r\n\r\nOn a scale of 1 to 5, where 1 is low and 5 is high","Are the sauna prices reasonable and in line with your expectations?\r\n\r\nOn a scale of 1 to 5, where 1 is low and 5 is high",What improvements or additions would you like to see in the sauna facilities or services?,"Do you know of any specific location where we could set up a community sauna? (e.g., a park, unused building, or local space)","What improvements would you suggest for the facilities, services, or events?",Are you familiar with our social prescribing offers and services?,"If yes, how did you hear about it?",importance_reason,In which area of London would you like us to set up a sauna?,What topics would you like to hear more about from us in our communications? (Select all that apply)
0,45688.689685,25–34,Female,"White (e.g., British, Irish, other European ba...",Yes,Employed,Judaism,Central London,Once or twice,Word of mouth,...,5.0,5.0,,,,Yes,Social Media,,Central London,"Sauna health benefits, Not-for-profit work"
1,45688.689979,18–24,Male,"White (e.g., British, Irish, other European ba...",No,Employed,Atheism,South East London,Once or twice,Word of mouth,...,5.0,5.0,,,,Yes,Social Media,,South East London,
2,45688.690375,35–44,Male,"Asian or Asian British (e.g., Indian, Pakistan...",No,Employed,Agnosticism,South East London,I was already a regular sauna user,Word of mouth,...,4.0,4.0,,,,No,,,North East London,"Behind the scenes, Upcoming events"
3,45688.690694,25–34,Non-binary,"Mixed or multiple ethnic groups (e.g., White a...",No,Employed,Atheism,East London,Once or twice,Word of mouth,...,4.0,4.0,,,,Yes,Social Media,,Central London,"Sauna etiquette or best practices, Not-for-pro..."
4,45688.691168,45–54,Female,"White (e.g., British, Irish, other European ba...",No,Employed,Judaism,North West London,Once or twice,Word of mouth,...,5.0,3.0,,,,No,,,North West London,"Sauna health benefits, Upcoming events"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1794,45720.380779,25–34,Female,"White (e.g., British, Irish, other European ba...",No,Employed,Spiritual but not Religious,East London,Once or twice,Word of mouth,...,5.0,5.0,,,,No,,It is a great social activity to do as a group...,South East London,
1795,45720.450203,25–34,Female,"White (e.g., British, Irish, other European ba...",No,Employed,Atheism,South West London,Once or twice,Friend took me,...,4.0,5.0,"There wasn’t a light in the changing room, pho...",Unsure of a specific location but some (in/clo...,Changing room lighting,No,,I felt great after the session and would like ...,South West London,"Sauna health benefits, Sauna etiquette or best..."
1796,45720.463383,25–34,Female,"Mixed or multiple ethnic groups (e.g., White a...",No,Self employed,Agnosticism,West London,I was already a regular sauna user,Word of mouth,...,5.0,5.0,Offering some sort of salt scrub to pair well ...,,More advertising for events,No,,It feels like you’re distancing yourself from ...,North West London,"Behind the scenes, Sauna etiquette or best pra..."
1797,45720.918005,25–34,Female,"White (e.g., British, Irish, other European ba...",No,Employed,Atheism,East London,Once or twice,Word of mouth,...,5.0,5.0,,,,No,Social Media,"Improved mental health, makes me feel strong a...",,Sauna etiquette or best practices


# Analysis

In [4]:
# importing the one hot encoder from sklearn
from sklearn.preprocessing import OneHotEncoder

# creating encoder object
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')

# Selecting relevant columns for one-hot encoding
df_subset = df[["visit_frequency", "physical_health_improvements", "mental_health_improvements"]]
# remove rows where visit_frequency is NaN
df_subset = df_subset.dropna(subset=["visit_frequency"])

# Converting 'visit_frequency' to categorical type for one-hot encoding
df_pandas_encoded = pd.get_dummies(df_subset, columns=["visit_frequency"])
# One-hot encoding the 'visit_frequency' column
onehotenc = encoder.fit_transform(df_subset[['visit_frequency']])
one_hot_df = pd.DataFrame(onehotenc, columns=encoder.get_feature_names_out(['visit_frequency']))

# add the other columns using join 
df_subset = df_subset.join(one_hot_df)
df_subset = df_subset.drop(columns=["visit_frequency"])
df_subset

Unnamed: 0,physical_health_improvements,mental_health_improvements,visit_frequency_Daily,visit_frequency_Monthly,visit_frequency_Rarely,visit_frequency_Weekly
0,Yes,Yes,0.0,1.0,0.0,0.0
1,Not sure,Not sure,0.0,0.0,1.0,0.0
2,Not sure,Not sure,0.0,0.0,0.0,1.0
3,Yes,Yes,0.0,0.0,1.0,0.0
4,Yes,Yes,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...
1794,Yes,Yes,0.0,1.0,0.0,0.0
1795,Not sure,Not sure,,,,
1796,Not sure,Yes,,,,
1797,Not sure,Yes,,,,


In [6]:
# create an array summing the visit frequency columns for data integrity

visit_frequency_columns = [col for col in df_subset.columns if col.startswith('visit_frequency_')]
visit_frequency_sum = df_subset[visit_frequency_columns].sum(axis=1)
df_subset['visit_frequency_sum'] = visit_frequency_sum
df_subset

Unnamed: 0,physical_health_improvements,mental_health_improvements,visit_frequency_Daily,visit_frequency_Monthly,visit_frequency_Rarely,visit_frequency_Weekly,visit_frequency_sum
0,Yes,Yes,0.0,1.0,0.0,0.0,2.0
1,Not sure,Not sure,0.0,0.0,1.0,0.0,2.0
2,Not sure,Not sure,0.0,0.0,0.0,1.0,2.0
3,Yes,Yes,0.0,0.0,1.0,0.0,2.0
4,Yes,Yes,0.0,1.0,0.0,0.0,2.0
...,...,...,...,...,...,...,...
1794,Yes,Yes,0.0,1.0,0.0,0.0,2.0
1795,Not sure,Not sure,,,,,0.0
1796,Not sure,Yes,,,,,0.0
1797,Not sure,Yes,,,,,0.0


### Statistical testing

In [7]:
from scipy.stats import chi2_contingency

# Performing Chi-square test of independence
# between frequency of sauna visits and physical health improvements
contingency = pd.crosstab(
    df['visit_frequency'],
    df['physical_health_improvements']
)

chi2, p, dof, expected = chi2_contingency(contingency)

print("Chi-square statistic:", chi2)
print("Degrees of freedom:", dof)
print("P-value:", p)


Chi-square statistic: 45.23648636310094
Degrees of freedom: 6
P-value: 4.2001503593526665e-08


In [9]:
# Performing Chi-square test of independence
# between frequency of sauna visits and mental health improvements
contingency = pd.crosstab(
    df['visit_frequency'],
    df['mental_health_improvements']
)

chi2, p, dof, expected = chi2_contingency(contingency)

print("Chi-square statistic:", chi2)
print("Degrees of freedom:", dof)
print("P-value:", p)


Chi-square statistic: 71.92858875832697
Degrees of freedom: 6
P-value: 1.6434556249618334e-13


### Paired t-test from intervention study

### Regression analysis

In [20]:
# Preparing columns for binary regression analysis
# Map 'Yes' to 1, 'No' to 0, leave others (e.g., 'Not sure', NaN) as NaN
binary_improvements = pd.DataFrame()
binary_improvements['physical_health_improvements'] = df['physical_health_improvements'].map({'Yes': 1, 'No': 0})
binary_improvements['mental_health_improvements'] = df['mental_health_improvements'].map({'Yes': 1, 'No': 0})
binary_improvements.dropna(inplace=True)
binary_improvements.shape

(1202, 2)

In [None]:
# import statsmodels for regression analysis
import statsmodels.api as sm
# Define the independent variables (X) and dependent variable (y)
X = df_subset.drop(columns=['physical_health_improvements', 'mental_health_improvements'])

### Topic modelling 
LMF and NDA topic modelling 