In [11]:
# Import necessary libraries
import pandas as pd
import statsmodels.api as sm

# Step 1: Load the dataset from the CSV file
df = pd.read_csv('/workspaces/Phuong5/1669242turnover.csv')

# Create dummy variables for categorical columns (drop_first=True to avoid dummy trap)
df_encoded = pd.get_dummies(df, columns=['Disciplined', 'Social_drinker', 'Social_smoker'], drop_first=True)

# Define the dependent variable (Months_active) and all independent variables
y = df_encoded['Months_active']
X = df_encoded.drop(columns=['Months_active'])  # Include all other variables as independent variables

# Convert to numeric (in case of any object types)
X = X.astype(float)
y = y.astype(float)

# Add a constant term for the intercept
X_const = sm.add_constant(X)

# Fit the OLS regression model
model_initial = sm.OLS(y, X_const).fit()

# Print the summary to inspect the initial results
print(model_initial.summary())

                            OLS Regression Results                            
Dep. Variable:          Months_active   R-squared:                       0.659
Model:                            OLS   Adj. R-squared:                  0.654
Method:                 Least Squares   F-statistic:                     115.1
Date:                Thu, 27 Mar 2025   Prob (F-statistic):          7.19e-145
Time:                        14:39:29   Log-Likelihood:                -1568.3
No. Observations:                 666   AIC:                             3161.
Df Residuals:                     654   BIC:                             3215.
Df Model:                          11                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                 83.0055     15

In [12]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Calculate VIF for each independent variable
vif_data = pd.DataFrame()
vif_data['Variable'] = X_const.columns
vif_data['VIF'] = [variance_inflation_factor(X_const.values, i) for i in range(X_const.shape[1])]

# Print the VIF table
print("VIF Values:")
print(vif_data)

VIF Values:
              Variable           VIF
0                const  23862.609957
1   Distance_from_work      1.669787
2                  Age      1.590323
3             Children      1.336165
4                 Pets      1.387897
5               Weight    163.386509
6               Height     30.034015
7                  BMI    147.525079
8         Absent_hours      1.076758
9      Disciplined_yes      1.079833
10  Social_drinker_yes      1.451719
11   Social_smoker_yes      1.215341


In [None]:
# Import necessary libraries (assumed to be imported from previous steps)
from stargazer.stargazer import Stargazer
from IPython.core.display import HTML

# Model 1: Simpler model
X1 = df_encoded[['Distance_from_work', 'Age', 'Children']]
X1 = X1.astype(float)
X1_const = sm.add_constant(X1)
model1 = sm.OLS(y, X1_const).fit()

# Model 2: Full model (after dropping BMI and Weight)
X_reduced = df_encoded.drop(columns=['Months_active', 'BMI', 'Weight'])
X_reduced = X_reduced.astype(float)
X_reduced_const = sm.add_constant(X_reduced)
model_reduced = sm.OLS(y, X_reduced_const).fit()

# Create the APA table using Stargazer 
table = Stargazer([model1, model_reduced])
table.title("Table 1: Regression Results Predicting Months Active")
table.custom_columns(['Model 1', 'Model 2'], [1, 1])
table.show_stars = True  # Corrected: Set the property instead of calling it
table.rename_covariates({
    'const': 'Constant',
    'Disciplined_yes': 'Disciplined (Yes)',
    'Social_drinker_yes': 'Social Drinker (Yes)',
    'Social_smoker_yes': 'Social Smoker (Yes)'
})

# Render the table as HTML
HTML(table.render_html())

0,1,2
,,
,Dependent variable: Months_active,Dependent variable: Months_active
,,
,Model 1,Model 2
,(1),(2)
,,
Absent_hours,,-0.006
,,(0.008)
Age,0.428***,0.345***
,(0.019),(0.018)
