In [None]:
# Social Time Satisfied Mappping

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv('CSCS_data_anon.csv', low_memory=False)


# List of the preference and social time columns
preference_time_columns = [
    'CONNECTION_preference_time_family',
    'CONNECTION_preference_time_friends',
    'CONNECTION_preference_time_coworkers_classmates',
    'CONNECTION_preference_time_neighbours',
    'CONNECTION_preference_time_coworkers_p7d',
    'CONNECTION_preference_classmates_p7d'
]

social_time_columns = [
    'CONNECTION_social_time_family_p7d',
    'CONNECTION_social_time_friends_p7d',
    'CONNECTION_social_time_coworkers_and_classmates_p7d',
    'CONNECTION_social_time_neighbours_p7d',
    'CONNECTION_social_time_coworkers_p7d',
    'CONNECTION_social_time_classmates_p7d'
]

# Map the values for each column in preference and social time lists
for col in preference_time_columns + social_time_columns:
    df[f"{col}_numeric"] = df[col].replace(time_mapping)

# Calculate the squared differences and then take the square root for each matching pair
dissatisfaction_columns = []
for pref_col, soc_col in zip(preference_time_columns, social_time_columns):
    col_name = f'dissatisfaction_{pref_col.split("_")[-1]}'
    df[col_name] = np.sqrt((df[pref_col] - df[soc_col]) ** 2)
    dissatisfaction_columns.append(col_name)

# Display the first few rows of the adjusted columns
print(df[['dissatisfaction_family', 'dissatisfaction_friends', 'dissatisfaction_coworkers_classmates', ‘dissatisfaction_neighbours']].head())

In [None]:
# Social Time Dissatisfaction
df['dissatisfaction_family_z'] = zscore(df['dissatisfaction_family'])
df['dissatisfaction_friends_z'] = zscore(df['dissatisfaction_friends'])
df['dissatisfaction_coworkers_classmates_z'] = zscore(df['dissatisfaction_coworkers_classmates'])
df['dissatisfaction_neighbours_z'] = zscore(df[‘dissatisfaction_neighbours'])

In [None]:
df['Social_Time_Dissatisfaction_Composite'] = df[['dissatisfaction_family_z', 
                                                  'dissatisfaction_friends_z', 
                                                  'dissatisfaction_coworkers_classmates_z', 
                                                  'dissatisfaction_neighbours_z']].mean(axis=1, skipna=True)

In [None]:
# Create a new column for physical wellness (True if "poor", False otherwise)
df['physical_poor'] = df['wellness_physical'].apply(lambda x: True if x == 'poor' else False)

# Create a new column for mental wellness (True if "poor", False otherwise)
df['mental_poor'] = df['wellness_mental'].apply(lambda x: True if x == 'poor' else False)

# Define the columns to check for 'Yes' or 'Yes, currently' in their values
diagnosis_treatment_columns = [col for col in df.columns if 'diagnosis' in col.lower() or 'treatment' in col.lower()]
additional_columns = ['DEMO_identity_disability', 'DEMO_identity_mental_health']
columns_to_check = diagnosis_treatment_columns + additional_columns

# Create a new column 'health_condition_or_disability' that is True if any specified column contains 'Yes' or 'Yes, currently', otherwise False
df['health_condition_or_disability'] = df[columns_to_check].apply(lambda row: row.astype(str).str.contains('Yes|Yes, currently', case=False, na=False).any(), axis=1)


In [None]:
# Set the style for Seaborn plots
sns.set(style="whitegrid")

# Create histograms for Social_Time_Dissatisfaction_Composite
plt.figure(figsize=(20, 12))

# Histogram for physical_poor=True
plt.subplot(2, 2, 1)
sns.histplot(data=df[df['physical_poor'] == True], x='Social_Time_Dissatisfaction_Composite', kde=True, bins=20, color="blue")
plt.title("Social Time Dissatisfaction (Physical Poor=True)")
plt.xlabel("Social Time Dissatisfaction Composite")
plt.ylabel("Frequency")

# Histogram for physical_poor=False
plt.subplot(2, 2, 2)
sns.histplot(data=df[df['physical_poor'] == False], x='Social_Time_Dissatisfaction_Composite', kde=True, bins=20, color="green")
plt.title("Social Time Dissatisfaction (Physical Poor=False)")
plt.xlabel("Social Time Dissatisfaction Composite")
plt.ylabel("Frequency")

# Histogram for health_condition_or_disability=True
plt.subplot(2, 2, 3)
sns.histplot(data=df[df['health_condition_or_disability'] == True], x='Social_Time_Dissatisfaction_Composite', kde=True, bins=20, color="orange")
plt.title("Social Time Dissatisfaction (Health Condition=True)")
plt.xlabel("Social Time Dissatisfaction Composite")
plt.ylabel("Frequency")

# Histogram for health_condition_or_disability=False
plt.subplot(2, 2, 4)
sns.histplot(data=df[df['health_condition_or_disability'] == False], x='Social_Time_Dissatisfaction_Composite', kde=True, bins=20, color="red")
plt.title("Social Time Dissatisfaction (Health Condition=False)")
plt.xlabel("Social Time Dissatisfaction Composite")
plt.ylabel("Frequency")

# Adjust layout
plt.tight_layout(pad=3)
plt.show()