# Demo

In [None]:
# Import statements
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import statsmodels.formula.api as smf

In [None]:
# Data from Canadian Social Connection Survey
data = pd.read_csv("https://raw.githubusercontent.com/pointOfive/stat130chat130/main/CP/CSCS_data_anon.csv")
var = pd.read_csv("https://raw.githubusercontent.com/pointOfive/stat130chat130/main/CP/var_names.csv")

In [None]:
# Explanation of 'CONNECTION_social_num_close_friends' (numerical outcome)
pd.unique(var.loc[var['new_var'] == 'CONNECTION_social_num_close_friends', 'text'])

In [None]:
# Explanation of 'COVID_prevention_distancing' (categorical predictor)
pd.unique(var.loc[var['new_var'] == 'COVID_prevention_distancing', 'text'])

In [None]:
# Explanation of 'DEMO_student' (categorical predictor)
pd.unique(var.loc[var['new_var'] == 'DEMO_student', 'text'])

In [None]:
# Explanation of 'WORK_hours_per_week' (numerica predictor)
pd.unique(var.loc[var['new_var'] == 'WORK_hours_per_week', 'text'])

In [None]:
# Explanation of 'CONNECTION_preference_time_strangers' (numerical predictor)
pd.unique(var.loc[var['new_var'] == 'CONNECTION_preference_time_strangers', 'text'])

## 1.

| Variable | Column Label in `data`                  | Meaning(s) from `var`                                                                                                                             |
| -------- | --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
| $Y_i$    | `'CONNECTION_social_num_close_friends'` | How many close friends do you have?                                                                                                               |
| $x_i$    | `'COVID_prevention_distancing'`         | To what extent are you currently following the COVID-19 prevention practices listed below? - Physically distance yourself by 2 metres from others |

In [None]:
# Data preparation
data_1 = data[['CONNECTION_social_num_close_friends', 'COVID_prevention_distancing']].dropna()
data_1 = data_1[data_1['COVID_prevention_distancing'] != 'Presented but no response']

In [None]:
# Summary statistics of 'CONNECTION_social_num_close_friends' (numerical outcome)
data_1['CONNECTION_social_num_close_friends'].describe()

In [None]:
# Value counts of 'COVID_prevention_distancing' (categorical predictor)
data_1['COVID_prevention_distancing'].value_counts()

In [None]:
# Model fitting and summary
fitted_model_1 = smf.ols('CONNECTION_social_num_close_friends ~ COVID_prevention_distancing', data_1).fit()
fitted_model_1.summary().tables[1]

$$\hat{y}_i = 4.2317 + 0.0314 \times 1_{\left[ x_i = \text{"Somewhat closely"} \right]}(x_i) + 0.4807 \times 1_{\left[ x_i = \text{"Very closely"} \right]}(x_i)$$

In [None]:
# Predictor and predicted values
x_i = ['Not at all', 'Somewhat closely', 'Very closely']
y_hat_i = [
    fitted_model_1.params['Intercept'],
    fitted_model_1.params['Intercept'] + fitted_model_1.params['COVID_prevention_distancing[T.Somewhat closely]'],
    fitted_model_1.params['Intercept'] + fitted_model_1.params['COVID_prevention_distancing[T.Very closely]']
]

# Plot the fitted model
fig = go.Figure()
fig.add_trace(go.Scatter(x=x_i, y=y_hat_i, mode='markers'))
fig.update_layout(
    title="Fitted Model 1",
    xaxis_title="COVID_prevention_distancing",
    yaxis_title="CONNECTION_social_num_close_friends"
)
fig.show()

## 2.

| Variable | Column Label in `data`                  | Meaning(s) from `var`                                                                                                 |
| -------- | --------------------------------------- | --------------------------------------------------------------------------------------------------------------------- |
| $Y_i$    | `'CONNECTION_social_num_close_friends'` | How many close friends do you have?                                                                                   |
| $x_i$    | `'DEMO_student'`                        | Are you currently a student?                                                                                          |
|          |                                         | Are you currently... (Check all that apply) - A student                                                               |
| $z_i$    | `'WORK_hours_per_week'`                 | During the COVID-19 pandemic, how many hours have you typically spent working for pay or in self-employment per week? |
|          |                                         | On average, how many hours per week do you work?                                                                      |

In [None]:
# Data preparation
data_2 = data[['CONNECTION_social_num_close_friends', 'DEMO_student', 'WORK_hours_per_week']].dropna()
data_2 = data_2[data_2['DEMO_student'] != 'Presented but no response']

In [None]:
# Summary statistics of 'CONNECTION_social_num_close_friends' (numerical outcome)
data_2['CONNECTION_social_num_close_friends'].describe()

In [None]:
# Value counts of 'DEMO_student' (categorical predictor)
data_2['DEMO_student'].value_counts()

In [None]:
# Summary statistics of 'WORK_hours_per_week' (numerical predictor)
data_2['WORK_hours_per_week'].describe()

In [None]:
# Model fitting and summary
fitted_model_2 = smf.ols('CONNECTION_social_num_close_friends ~ DEMO_student + WORK_hours_per_week', data_2).fit()
fitted_model_2.summary().tables[1]

$$\hat{y}_i = 3.8358 + 0.6840 \times 1_{\left[ x_i = \text{"Yes"} \right]}(x_i) + 0.0144 \times z_i$$

In [None]:
# Predictor and predicted values
z_i_range = np.array([data_2['WORK_hours_per_week'].min(), data_2['WORK_hours_per_week'].max()])
y_hat_i_range_for_x_i_No = fitted_model_2.params['Intercept'] + fitted_model_2.params['WORK_hours_per_week'] * z_i_range
y_hat_i_range_for_x_i_Yes = fitted_model_2.params['Intercept'] + fitted_model_2.params['DEMO_student[T.Yes]'] + fitted_model_2.params['WORK_hours_per_week'] * z_i_range

# Plot the fitted model
fig = go.Figure()
fig.add_trace(go.Scatter(x=z_i_range, y=y_hat_i_range_for_x_i_No, mode='lines', name='DEMO_student = No'))
fig.add_trace(go.Scatter(x=z_i_range, y=y_hat_i_range_for_x_i_Yes, mode='lines', name='DEMO_student = Yes'))
fig.update_layout(
    title="Fitted Model 2",
    xaxis_title="WORK_hours_per_week",
    yaxis_title="CONNECTION_social_num_close_friends"
)
fig.show()

## 3.

| Variable   | Column Label in `data`                   | Meaning(s) from `var`                                                                                                 |
| ---------- | ---------------------------------------- | --------------------------------------------------------------------------------------------------------------------- |
| $Y_i$      | `'CONNECTION_social_num_close_friends'`  | How many close friends do you have?                                                                                   |
| $z_{1i}$   | `'WORK_hours_per_week'`                  | During the COVID-19 pandemic, how many hours have you typically spent working for pay or in self-employment per week? |
|            |                                          | On average, how many hours per week do you work?                                                                      |
| $z_{2i}$   | `'CONNECTION_preference_time_strangers'` | Ideally, how many hours per week would you like to spend socializing with each of the following groups? - Strangers   |

In [None]:
# Data preparation
data_3 = data[['CONNECTION_social_num_close_friends', 'WORK_hours_per_week', 'CONNECTION_preference_time_strangers']].dropna()

In [None]:
# Summary statistics of 'CONNECTION_social_num_close_friends' (numerical outcome)
data_3['CONNECTION_social_num_close_friends'].describe()

In [None]:
# Summary statistics of 'WORK_hours_per_week' (numerical predictor)
data_3['WORK_hours_per_week'].describe()

In [None]:
# Summary statistics of 'CONNECTION_preference_time_strangers' (numerical predictor)
data_3['CONNECTION_preference_time_strangers'].describe()

In [None]:
# Model fitting and summary
fitted_model_3 = smf.ols('CONNECTION_social_num_close_friends ~ WORK_hours_per_week + CONNECTION_preference_time_strangers', data_3).fit()
fitted_model_3.summary().tables[1]

$$\hat{y}_i = 4.9483 + 0.0029 \times z_{1i} + 0.1023 \times z_{2i}$$

In [None]:
# Predictors and predicted values
z_1i_range = np.array([[data_3['WORK_hours_per_week'].min(), data_3['WORK_hours_per_week'].min()],
                       [data_3['WORK_hours_per_week'].max(), data_3['WORK_hours_per_week'].max()]])
z_2i_range = np.array([[data_3['CONNECTION_preference_time_strangers'].min(), data_3['CONNECTION_preference_time_strangers'].max()],
                       [data_3['CONNECTION_preference_time_strangers'].min(), data_3['CONNECTION_preference_time_strangers'].max()]])
y_hat_i_range = fitted_model_3.params['Intercept'] + fitted_model_3.params['WORK_hours_per_week'] * z_1i_range + fitted_model_3.params['CONNECTION_preference_time_strangers'] * z_2i_range

# Plot the fitted model
fig = go.Figure()
fig.add_trace(go.Surface(x=z_1i_range[:, 0], y=z_2i_range[0], z=y_hat_i_range))
fig.update_layout(
    title="Fitted Model 3",
    width=700,
    height=700,
    scene=dict(
        xaxis_title="WORK_hours_per_week",
        yaxis_title="CONNECTION_preference_time_strangers",
        zaxis_title="CONNECTION_social_num_close_friends"
))
fig.show()

## 4.

| Variable | Column Label in `data`                  | Meaning(s) from `var`                                                                                                 |
| -------- | --------------------------------------- | --------------------------------------------------------------------------------------------------------------------- |
| $Y_i$    | `'CONNECTION_social_num_close_friends'` | How many close friends do you have?                                                                                   |
| $x_i$    | `'DEMO_student'`                        | Are you currently a student?                                                                                          |
|          |                                         | Are you currently... (Check all that apply) - A student                                                               |
| $z_i$    | `'WORK_hours_per_week'`                 | During the COVID-19 pandemic, how many hours have you typically spent working for pay or in self-employment per week? |
|          |                                         | On average, how many hours per week do you work?                                                                      |

In [None]:
# Model fitting and summary
fitted_model_4 = smf.ols('CONNECTION_social_num_close_friends ~ DEMO_student * WORK_hours_per_week', data_2).fit()
fitted_model_4.summary().tables[1]

$$\hat{y}_i = 3.8467 + 0.0138 \times z_i + 0.5713 \times 1_{\left[ x_i = \text{"Yes"} \right]}(x_i) + 0.0054 \times z_i \times 1_{\left[ x_i = \text{"Yes"} \right]}(x_i)$$

$$\hat{y}_i = 3.8467 + 0.5713 \times 1_{\left[ x_i = \text{"Yes"} \right]}(x_i) + \left( 0.0138 + 0.0054 \times 1_{\left[ x_i = \text{"Yes"} \right]}(x_i) \right) \times z_i$$

In [None]:
# Predictor and predicted values
z_i_range = np.array([data_2['WORK_hours_per_week'].min(), data_2['WORK_hours_per_week'].max()])
y_hat_i_range_for_x_i_No = fitted_model_4.params['Intercept'] + fitted_model_4.params['WORK_hours_per_week'] * z_i_range
y_hat_i_range_for_x_i_Yes = fitted_model_4.params['Intercept'] + fitted_model_4.params['DEMO_student[T.Yes]'] + fitted_model_4.params['WORK_hours_per_week'] * z_i_range + fitted_model_4.params['DEMO_student[T.Yes]:WORK_hours_per_week'] * z_i_range

# Plot the fitted model
fig = go.Figure()
fig.add_trace(go.Scatter(x=z_i_range, y=y_hat_i_range_for_x_i_No, mode='lines', name='DEMO_student = No'))
fig.add_trace(go.Scatter(x=z_i_range, y=y_hat_i_range_for_x_i_Yes, mode='lines', name='DEMO_student = Yes'))
fig.update_layout(
    title="Fitted Model 4",
    xaxis_title="WORK_hours_per_week",
    yaxis_title="CONNECTION_social_num_close_friends"
)
fig.show()

## 5.

| Variable   | Column Label in `data`                   | Meaning(s) from `var`                                                                                                 |
| ---------- | ---------------------------------------- | --------------------------------------------------------------------------------------------------------------------- |
| $Y_i$      | `'CONNECTION_social_num_close_friends'`  | How many close friends do you have?                                                                                   |
| $z_{1i}$   | `'WORK_hours_per_week'`                  | During the COVID-19 pandemic, how many hours have you typically spent working for pay or in self-employment per week? |
|            |                                          | On average, how many hours per week do you work?                                                                      |
| $z_{2i}$   | `'CONNECTION_preference_time_strangers'` | Ideally, how many hours per week would you like to spend socializing with each of the following groups? - Strangers   |

In [None]:
# Model fitting and summary
fitted_model_5 = smf.ols('CONNECTION_social_num_close_friends ~ WORK_hours_per_week * CONNECTION_preference_time_strangers', data_3).fit()
fitted_model_5.summary().tables[1]

$$\hat{y}_i = 4.8628 + 0.0061 \times z_{1i} + 0.1777 \times z_{2i} - 0.0023 \times z_{1i} \times z_{2i}$$

In [None]:
# Predictor and predicted values
n_samples = 101
x = np.linspace(data_3['WORK_hours_per_week'].min(), data_3['WORK_hours_per_week'].max(), n_samples)
y = np.linspace(data_3['CONNECTION_preference_time_strangers'].min(), data_3['CONNECTION_preference_time_strangers'].max(), n_samples)
z_1i, z_2i = np.meshgrid(x, y, indexing='ij')
y_hat_i = fitted_model_5.params['Intercept'] + fitted_model_5.params['WORK_hours_per_week'] * z_1i + fitted_model_5.params['CONNECTION_preference_time_strangers'] * z_2i + fitted_model_5.params['WORK_hours_per_week:CONNECTION_preference_time_strangers'] * z_1i * z_2i

# Plot the fitted model
fig = go.Figure()
fig.add_trace(go.Surface(x=z_1i[:, 0], y=z_2i[0], z=y_hat_i_range))
fig.update_layout(
    title="Fitted Model 5",
    width=700,
    height=700,
    scene=dict(
        xaxis_title="WORK_hours_per_week",
        yaxis_title="CONNECTION_preference_time_strangers",
        zaxis_title="CONNECTION_social_num_close_friends"
))
fig.show()