3. Relationship Between Social Media Usage, Burnout, and Perceived Social Support in Different Age Groups

Hypothesis: Increased social media usage affects burnout and social support differently across age groups.
- CONNECTION_social_media_time_per_day: Measure how time spent on social media correlates with social support and burnout.
- PSYCH_zimet_multidimensional_social_support_scale_family_emotional: Analyze emotional support from family and its correlation with social media usage.
- WELLNESS_malach_pines_burnout_measure_hopeless: Investigate how social media impacts feelings of burnout.
- DEMO_age: Compare the impact of social media usage across age groups on burnout and social support.
- CONNECTION_social_num_friends: Study if high social media use is linked with more perceived friends but more burnout.
- LIFESTYLE_time_use_balance_exercising: Analyze whether individuals who spend more time on social media sacrifice exercise, exacerbating burnout.

In [2]:
from scipy import stats
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pandas as pd

columns_to_examine = [
    # "DEMO_age",
    "PSYCH_zimet_multidimensional_social_support_scale_family_emotional",
    "WELLNESS_malach_pines_burnout_measure_hopeless",
    "CONNECTION_social_num_friends_p7d",
    "CONNECTION_social_num_friends_p7d_grouped",
    "CONNECTION_social_num_close_friends_p3m",
    "CONNECTION_social_num_casual_friends_p3m",
    "CONNECTION_social_num_classmates_p3m",
    "CONNECTION_social_num_coworkers_p3m",
    "CONNECTION_social_num_friends_of_friends_p3m",
    "CONNECTION_social_num_acquaintances_p3m",
    "CONNECTION_social_num_other_people_p3m",
    "LIFESTYLE_time_use_balance_exercising"
]

variable_df = pd.read_csv("var_names.csv")

original_df = pd.read_csv('CSCS_data_anon.csv', low_memory=False)


In [3]:
cohort = original_df[(original_df["SURVEY_collection_type"] == "cohort") & (original_df["REMOVE_case"] == 'No')].dropna(how='all', axis=1)
cross = original_df[original_df["SURVEY_collection_type"] == "cross"].dropna(how='all', axis=1)


In [4]:
cross[columns_to_examine].describe()

Unnamed: 0,CONNECTION_social_num_friends_p7d,CONNECTION_social_num_close_friends_p3m,CONNECTION_social_num_casual_friends_p3m,CONNECTION_social_num_classmates_p3m,CONNECTION_social_num_coworkers_p3m,CONNECTION_social_num_friends_of_friends_p3m,CONNECTION_social_num_acquaintances_p3m,CONNECTION_social_num_other_people_p3m
count,5154.0,4829.0,4776.0,564.0,2581.0,4704.0,4709.0,4223.0
mean,4.696352,6.918617,5.690117,6.030142,8.341728,3.604167,5.169038,3.741653
std,11.890125,18.514583,14.01393,16.469817,20.151775,10.881369,14.996781,12.339785
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0
50%,2.0,3.0,2.0,2.0,3.0,1.0,1.0,0.0
75%,5.0,5.0,5.0,7.0,7.0,3.0,4.0,3.0
max,150.0,150.0,150.0,150.0,150.0,150.0,150.0,150.0


In [5]:
age_counts_dict = cross["DEMO_age"].value_counts().to_dict()

print(sorted([(k, age_counts_dict[k]) for k in age_counts_dict]))
print(f"Min {min(age_counts_dict.keys())} Max {max(age_counts_dict.keys())}")
print(cross["DEMO_age"].quantile(0.2), cross["DEMO_age"].quantile(0.4), cross["DEMO_age"].quantile(0.6), cross["DEMO_age"].quantile(0.8))

cross_split_by_age = {}
for i in range(20, 100, 10):
    cross_split_by_age[f"{i}:{i + 10}"] = cross[(i <= cross["DEMO_age"]) & (cross["DEMO_age"] < i + 10)]

for k in cross_split_by_age:
    print(f"---------- AGE RANGE {k} ----------")
    print(cross_split_by_age[k][columns_to_examine].describe())

[(16.0, 22), (17.0, 24), (18.0, 112), (19.0, 60), (20.0, 113), (21.0, 103), (22.0, 140), (23.0, 180), (24.0, 177), (25.0, 341), (26.0, 296), (27.0, 185), (28.0, 386), (29.0, 205), (30.0, 357), (31.0, 170), (32.0, 276), (33.0, 241), (34.0, 171), (35.0, 240), (36.0, 219), (37.0, 119), (38.0, 158), (39.0, 132), (40.0, 144), (41.0, 105), (42.0, 128), (43.0, 105), (44.0, 96), (45.0, 128), (46.0, 107), (47.0, 103), (48.0, 91), (49.0, 106), (50.0, 142), (51.0, 116), (52.0, 125), (53.0, 132), (54.0, 150), (55.0, 163), (56.0, 159), (57.0, 177), (58.0, 179), (59.0, 168), (60.0, 199), (61.0, 196), (62.0, 229), (63.0, 220), (64.0, 214), (65.0, 186), (66.0, 162), (67.0, 160), (68.0, 149), (69.0, 155), (70.0, 164), (71.0, 126), (72.0, 118), (73.0, 150), (74.0, 98), (75.0, 119), (76.0, 72), (77.0, 71), (78.0, 65), (79.0, 64), (80.0, 58), (81.0, 39), (82.0, 38), (83.0, 21), (84.0, 23), (85.0, 20), (86.0, 13), (87.0, 5), (88.0, 6), (89.0, 10), (90.0, 6), (91.0, 8), (92.0, 2), (93.0, 1), (96.0, 2)]
Min 

In [6]:
# Select only numeric columns from columns_to_examine
numeric_columns = cross[columns_to_examine].select_dtypes(include='number').columns

# Group by 'DEMO_age' and calculate the mean for the numeric columns
age_group_means = cross.groupby("DEMO_age")[numeric_columns].mean()
print(age_group_means)

          CONNECTION_social_num_friends_p7d  \
DEMO_age                                      
16.0                               4.888889   
17.0                               3.615385   
18.0                               3.456522   
19.0                               4.342105   
20.0                               7.800000   
...                                     ...   
90.0                               2.600000   
91.0                               2.400000   
92.0                               5.000000   
93.0                                    NaN   
96.0                               7.500000   

          CONNECTION_social_num_close_friends_p3m  \
DEMO_age                                            
16.0                                     4.625000   
17.0                                     3.500000   
18.0                                     7.785714   
19.0                                     5.200000   
20.0                                     7.923077   
...              

In [7]:
import numpy as np
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from scipy.stats import gaussian_kde

# Select only numeric columns from columns_to_examine
numeric_columns = cross[columns_to_examine].select_dtypes(include='number').columns

# Group by 'DEMO_age' and calculate the mean for the numeric columns
age_group_means = cross.groupby("DEMO_age")[numeric_columns].mean()
print(age_group_means)

# Create subplots: one for each column in columns_to_examine
fig = make_subplots(rows=len(numeric_columns) // 2 + 1, cols=2, subplot_titles=numeric_columns)

# Iterate over each column and add a displot to each subplot
for i, column in enumerate(numeric_columns):
    print(f"{i = } {column = }")

    # Get the ages (DEMO_age) and the mean values for the specific column
    ages = age_group_means.index  # Assuming DEMO_age is the index
    values = age_group_means[column].dropna()

    # Add the histogram traces (if needed) to the corresponding subplot
    fig.add_trace(
        dict(
            x=ages,  # Age as x-axis
            y=values,  # Mean values as y-axis
            mode='lines+markers',
            name=f'{column} Mean',
            line=dict(color='blue')
        ),
        row=i // 2 + 1,
        col=i % 2 + 1,
    )

    # Custom KDE with lower bandwidth (optional if you need KDE)
    kde = gaussian_kde(values, bw_method=0.1)
    x_vals = np.linspace(min(ages), max(ages), 1000)
    kde_vals = kde(x_vals)

    # Add the custom KDE trace to the subplot (if needed)
    fig.add_trace(
        dict(
            x=x_vals,
            y=kde_vals,
            mode='lines',
            name=f'{column} KDE',
            line=dict(color='red')
        ),
        row=i // 2 + 1,
        col=i % 2 + 1,
    )

# Update layout for better visualization
fig.update_layout(
    title_text="Mean of Social Connection Metrics by Age Group",
    showlegend=False,
    height=1200,
    width=1200
)

# Show the figure
fig.show()


          CONNECTION_social_num_friends_p7d  \
DEMO_age                                      
16.0                               4.888889   
17.0                               3.615385   
18.0                               3.456522   
19.0                               4.342105   
20.0                               7.800000   
...                                     ...   
90.0                               2.600000   
91.0                               2.400000   
92.0                               5.000000   
93.0                                    NaN   
96.0                               7.500000   

          CONNECTION_social_num_close_friends_p3m  \
DEMO_age                                            
16.0                                     4.625000   
17.0                                     3.500000   
18.0                                     7.785714   
19.0                                     5.200000   
20.0                                     7.923077   
...              

In [8]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Select only numeric columns from columns_to_examine
numeric_columns = cross[columns_to_examine].select_dtypes(include='number').columns

# Create subplots: one for each column in columns_to_examine
fig = make_subplots(rows=len(numeric_columns) // 2 + 1, cols=2, subplot_titles=numeric_columns)

# Iterate over each column and add a scatter plot to each subplot
for i, column in enumerate(numeric_columns):
    print(f"{i = } {column = }")

    # Get the ages (DEMO_age) and the raw values for the specific column
    ages = cross["DEMO_age"]  # Using DEMO_age as the x-axis (age of the observation)
    values = cross[column]    # Raw values of each connection type as y-axis

    # Add the scatter plot to the corresponding subplot
    fig.add_trace(
        go.Scatter(
            x=ages,
            y=values,
            mode='markers',
            name=f'{column} Count',
            marker=dict(color='blue', size=5, opacity=0.6)
        ),
        row=i // 2 + 1,
        col=i % 2 + 1,
    )

# Update layout for better visualization
fig.update_layout(
    title_text="Social Connection Metrics by Age (Scatter Plot)",
    showlegend=False,
    height=1200,
    width=1200
)

# Show the figure
fig.show()


i = 0 column = 'CONNECTION_social_num_friends_p7d'
i = 1 column = 'CONNECTION_social_num_close_friends_p3m'
i = 2 column = 'CONNECTION_social_num_casual_friends_p3m'
i = 3 column = 'CONNECTION_social_num_classmates_p3m'
i = 4 column = 'CONNECTION_social_num_coworkers_p3m'
i = 5 column = 'CONNECTION_social_num_friends_of_friends_p3m'
i = 6 column = 'CONNECTION_social_num_acquaintances_p3m'
i = 7 column = 'CONNECTION_social_num_other_people_p3m'


In [None]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Select only numeric columns from columns_to_examine
numeric_columns = cross[columns_to_examine].select_dtypes(include='number').columns

# Create subplots: one for each column in columns_to_examine
fig = make_subplots(rows=len(numeric_columns) // 2 + 1, cols=2, subplot_titles=numeric_columns)

# Iterate over each column and add a scatter plot to each subplot
for i, column in enumerate(numeric_columns):
    print(f"{i = } {column = }")

    # Get the ages (DEMO_age) and the raw values for the specific column
    ages = cross["DEMO_age"]  # Using DEMO_age as the x-axis (age of the observation)
    values = cross[column]    # Raw values of each connection type as y-axis

    # Calculate Q1 (25th percentile) and Q3 (75th percentile)
    Q1 = values.quantile(0.25)
    Q3 = values.quantile(0.75)

    # Calculate IQR
    IQR = Q3 - Q1

    # Define the lower and upper bounds
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # Group by 'DEMO_age' and filter out outliers for each age group
    filtered_data = cross.groupby("DEMO_age", group_keys=False).apply(lambda group: group[(group[column] >= group[column].quantile(0.25) - 1.5 * (group[column].quantile(0.75) - group[column].quantile(0.25))) &
                                                                                        (group[column] <= group[column].quantile(0.75) + 1.5 * (group[column].quantile(0.75) - group[column].quantile(0.25)))])

    # Reset index if necessary
    filtered_data = filtered_data.reset_index(drop=True)

    # Add the scatter plot to the corresponding subplot
    fig.add_trace(
        go.Scatter(
            x=ages,
            y=filtered_data,
            mode='markers',
            name=f'{column} Count',
            marker=dict(color='blue', size=5, opacity=0.6)
        ),
        row=i // 2 + 1,
        col=i % 2 + 1,
    )

# Update layout for better visualization
fig.update_layout(
    title_text="Social Connection Metrics by Age (Scatter Plot)",
    showlegend=False,
    height=1200,
    width=1200
)

# Show the figure
fig.show()


i = 0 column = 'CONNECTION_social_num_friends_p7d'






i = 1 column = 'CONNECTION_social_num_close_friends_p3m'






i = 2 column = 'CONNECTION_social_num_casual_friends_p3m'






i = 3 column = 'CONNECTION_social_num_classmates_p3m'






i = 4 column = 'CONNECTION_social_num_coworkers_p3m'






i = 5 column = 'CONNECTION_social_num_friends_of_friends_p3m'






i = 6 column = 'CONNECTION_social_num_acquaintances_p3m'






i = 7 column = 'CONNECTION_social_num_other_people_p3m'






In [10]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Select only numeric columns from columns_to_examine
numeric_columns = cross[columns_to_examine].select_dtypes(include='number').columns

# Create subplots: one for each column in columns_to_examine
fig = make_subplots(rows=len(numeric_columns) // 2 + 1, cols=2, subplot_titles=numeric_columns)

# Iterate over each column and add a scatter plot to each subplot
for i, column in enumerate(numeric_columns):
    print(f"{i = } {column = }")

    # Get the ages (DEMO_age) and the raw values for the specific column
    ages = cross["DEMO_age"]  # Using DEMO_age as the x-axis (age of the observation)
    values = cross[column]    # Raw values of each connection type as y-axis

    # Add the scatter plot to the corresponding subplot
    fig.add_trace(
        go.Scatter(
            x=ages,
            y=values,
            mode='markers',
            name=f'{column} Count',
            marker=dict(color='blue', size=5, opacity=0.6)
        ),
        row=i // 2 + 1,
        col=i % 2 + 1,
    )

# Update layout to use a log scale for y-axis in all subplots
fig.update_layout(
    title_text="Social Connection Metrics by Age (Scatter Plot with Log Scale)",
    showlegend=False,
    height=1200,
    width=1200,
)

# Set log scale for all y-axes
for j in range(1, len(numeric_columns) // 2 + 2):  # Adjust for number of rows
    fig.update_yaxes(type="log", row=j, col=1)  # Left column subplots
    fig.update_yaxes(type="log", row=j, col=2)  # Right column subplots

# Show the figure
fig.show()


i = 0 column = 'CONNECTION_social_num_friends_p7d'
i = 1 column = 'CONNECTION_social_num_close_friends_p3m'
i = 2 column = 'CONNECTION_social_num_casual_friends_p3m'
i = 3 column = 'CONNECTION_social_num_classmates_p3m'
i = 4 column = 'CONNECTION_social_num_coworkers_p3m'
i = 5 column = 'CONNECTION_social_num_friends_of_friends_p3m'
i = 6 column = 'CONNECTION_social_num_acquaintances_p3m'
i = 7 column = 'CONNECTION_social_num_other_people_p3m'


In [11]:
cohort_to_examine = ["PSYCH_zimet_multidimensional_social_support_scale_family_emotional", "WELLNESS_malach_pines_burnout_measure_hopeless"]

cohort[cohort_to_examine].describe()

Unnamed: 0,PSYCH_zimet_multidimensional_social_support_scale_family_emotional,WELLNESS_malach_pines_burnout_measure_hopeless
count,442,441
unique,8,8
top,Agree,Sometimes
freq,99,103


In [12]:
cohort[cohort_to_examine]

Unnamed: 0,PSYCH_zimet_multidimensional_social_support_scale_family_emotional,WELLNESS_malach_pines_burnout_measure_hopeless
21,Very Strongly Agree,Never
71,Agree,Never
77,Neither Agree Nor Disagree,Almost never
80,Neither Agree Nor Disagree,Rarely
105,Strongly Agree,Almost never
...,...,...
11366,Disagree,Sometimes
11373,Strongly Disagree,Always
11380,Very Strongly Disagree,Often
11412,Strongly Agree,Rarely


In [13]:
cohort = original_df