In [8]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf

In [9]:
df = pd.read_stata('survey_data_combined.dta')

In [10]:
# Set 'in_followup_survey' based on 'surv2016'
df['in_followup_survey'] = df['surv2016']

  df['in_followup_survey'] = df['surv2016']


In [11]:
# Generate 'working' column based on 'employed'
df['working'] = df.groupby('employed').ngroup()

# Remove rows where 'working' is 1
df = df[df['working'] != 1]

# Convert 'income' and 'percent_chance_fired' to numeric
df['income'] = pd.to_numeric(df['income'], errors='coerce')
df['percent_chance_fired'] = pd.to_numeric(df['percent_chance_fired'], errors='coerce')

# Rename columns
df.rename(columns={'av_an_inc_earnings_colleagues': 'percent_increase_colleagues',
                   'percent_colleagues_laid_off': 'colleagues_laid_off'}, inplace=True)

# Clean and convert percentage columns
percent_columns = ['colleagues_laid_off', 'percent_pay_bonus', 'percent_colleagues_male', 'percent_increase_colleagues']
for col in percent_columns:
    df[col] = df[col].str.replace('%', '').replace('', np.nan).astype(float)
    
# Generate 'futureearning' column
disc = 0.95  # example value for discount rate
df['futureearning'] = 0
for y in range(1, 31):
    df['futureearning'] += (disc ** y) * np.log(df['income'] * (1 + df['colleagues_laid_off'] / 100) ** y)

# Clean 'hours_per_week_self' column
df['hours_per_week_self'] = df['hours_per_week_self'].str[:2]
df['hours_per_week_self'] = pd.to_numeric(df['hours_per_week_self'], errors='coerce')

# Convert 'part_time_option' to numeric
df['part_time_option'] = df['part_time_option'].map({'Yes': 1, 'No': 0})

# Remove rows with missing follow-up observations
df.drop(df[df['id_7d'] == 8294018].index, inplace=True)

# Generate 'pat_mat_leave' and 'flexible_work' columns
df['pat_mat_leave'] = (~df['maternity_leave'].isna()) | (~df['paternity_leave'].isna())
df['flexible_work'] = (~df['flexible_work_options'].isna())

# Replace missing values in 'pat_mat_leave' and 'flexible_work' with 0
df['pat_mat_leave'].fillna(0, inplace=True)
df['flexible_work'].fillna(0, inplace=True)

# Generate 'parttime_flex_option' column
df['parttime_flex_option'] = (df['flexible_work'] == 1) | (df['part_time_option'] == 1)
df['parttime_flex_option'].fillna(0, inplace=True)

# Convert 'earnings_expected_30' to numeric
df['earnings_expected_30'] = pd.to_numeric(df['earnings_expected_30'], errors='coerce')

# Convert 'income' and 'percent_chance_fired' to numeric, coercing errors to NaN
df['income'] = pd.to_numeric(df['income'], errors='coerce')
df['percent_chance_fired'] = pd.to_numeric(df['percent_chance_fired'], errors='coerce')


  df['working'] = df.groupby('employed').ngroup()
  df['futureearning'] = 0
  result = getattr(ufunc, method)(*inputs, **kwargs)
  df['pat_mat_leave'] = (~df['maternity_leave'].isna()) | (~df['paternity_leave'].isna())
  df['flexible_work'] = (~df['flexible_work_options'].isna())
  df['parttime_flex_option'] = (df['flexible_work'] == 1) | (df['part_time_option'] == 1)


In [12]:
# Load the dataset to merge
df_to_merge = pd.read_stata('indv_beta_wtp_percent_model1.dta')

# Merge the datasets on the 'id_7d' column
df = pd.merge(df, df_to_merge, on='id_7d', how='inner')

In [13]:
# Global variables
redo_bootstrap = 1

# Local variables
redo_descriptive_analysis = 1
redo_percentiles_betas_wtps = 1
redo_Table_6 = 1
redo_heterogeneity = 1
redo_followup_analysis = 1
redo_delta_analysis = 1
redo_expectations_regs = 1
redo_attributes = 1

In [14]:
# Convert parttime flex option to 0-100 scale
df['parttime_flex_option'] = df['parttime_flex_option'] * 100

In [15]:
# Rename columns
df.rename(columns={'parttime_flex_option': 'actual_parttime',
                   'percent_chance_fired': 'actual_fired',
                   'percent_pay_bonus': 'actual_bonus',
                   'percent_colleagues_male': 'actual_fracmale',
                   'percent_increase_colleagues': 'actual_raise',
                   'hours_per_week_self': 'actual_hours',
                   'futureearning': 'actual_futureearning'}, inplace=True)


In [25]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

# Define global variables
vars_list = ['fired', 'bonus', 'fracmale', 'raise', 'hours', 'parttime']

# Set seed
np.random.seed(1)

# Generate random numbers
df['rand'] = np.random.uniform(0, 1, len(df))

# Loop through variables
for var in vars_list:
    # Group by in_followup_survey and calculate rank for wtp_var
    df[f'rank_wtp_{var}'] = df[df['in_followup_survey'] == 1].groupby(f'wtp_{var}')['rand'].rank(method='first')
    
    # Group by in_followup_survey and calculate rank for actual_var
    df[f'rank_actual_{var}'] = df[df['in_followup_survey'] == 1].groupby(f'actual_{var}')['rand'].rank(method='first')
    
    # Perform regression
    X = df.loc[df['in_followup_survey'] == 1, f'rank_wtp_{var}']
    y = df.loc[df['in_followup_survey'] == 1, f'rank_actual_{var}']
    X = sm.add_constant(X)  # Add constant term for intercept
    model = sm.OLS(y, X).fit()
    print(model.summary())


                            OLS Regression Results                            
Dep. Variable:      rank_actual_fired   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 04 Apr 2024   Prob (F-statistic):                nan
Time:                        20:51:41   Log-Likelihood:                -195.11
No. Observations:                  59   AIC:                             392.2
Df Residuals:                      58   BIC:                             394.3
Df Model:                           0                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
rank_wtp_fired     6.9831      0.867      8.

  df['rand'] = np.random.uniform(0, 1, len(df))
  df[f'rank_wtp_{var}'] = df[df['in_followup_survey'] == 1].groupby(f'wtp_{var}')['rand'].rank(method='first')
  df[f'rank_actual_{var}'] = df[df['in_followup_survey'] == 1].groupby(f'actual_{var}')['rand'].rank(method='first')
  df[f'rank_wtp_{var}'] = df[df['in_followup_survey'] == 1].groupby(f'wtp_{var}')['rand'].rank(method='first')
  df[f'rank_actual_{var}'] = df[df['in_followup_survey'] == 1].groupby(f'actual_{var}')['rand'].rank(method='first')
  df[f'rank_wtp_{var}'] = df[df['in_followup_survey'] == 1].groupby(f'wtp_{var}')['rand'].rank(method='first')
  df[f'rank_actual_{var}'] = df[df['in_followup_survey'] == 1].groupby(f'actual_{var}')['rand'].rank(method='first')
  df[f'rank_wtp_{var}'] = df[df['in_followup_survey'] == 1].groupby(f'wtp_{var}')['rand'].rank(method='first')
  df[f'rank_actual_{var}'] = df[df['in_followup_survey'] == 1].groupby(f'actual_{var}')['rand'].rank(method='first')
  df[f'rank_wtp_{var}'] = df[df['in_foll