In [12]:
# import libraries
# Carson Batchelor
import pandas as pd
import numpy as np

In [17]:
# load the data and examine it
performance_data = pd.read_csv("performance.csv", delimiter='\t')
quit_data = pd.read_csv("quit_data.csv", delimiter='\t')
wfh_data = pd.read_csv("wfh.csv")

print("Performance Data:\n", performance_data.head())
print("Quit Data:\n", quit_data.head())
print("WFH Data:\n", wfh_data.head())

Performance Data:
    treat perf_2022h1
0      0           B
1      0           B
2      1           A
3      0           B
4      1           D
Quit Data:
         treat         role  male             live_far  quit
0   Treatment      Manager  Male   Commute <= 90 mins     0
1   Treatment      Manager  Male   Commute >  90 mins     0
2     Control  Non-manager  Male   Commute <= 90 mins     0
3     Control  Non-manager  Male   Commute >  90 mins     0
4     Control  Non-manager  Male   Commute <= 90 mins     0
WFH Data:
    treat  volunteer  takeuprate  date
0      0          0    0.004016     0
1      1          0    0.454183     0
2      0          1    0.001887     0
3      1          1    0.269450     0
4      0          0    0.004032     2


In [19]:
quit_data['treat'] = quit_data['treat'].map({' Treatment': 1, 'Control': 0})
quit_data['role'] = quit_data['role'].map({'Manager': 1, 'Non-manager': 0})
quit_data['male'] = quit_data['male'].map({'Male': 1, 'Female': 0})
quit_data['live_far'] = quit_data['live_far'].map({' Commute >  90 mins': 1, 'Commute <= 90 mins': 0})
performance_data['perf_2022h1'] = performance_data['perf_2022h1'].map({"D":0, "C":1, "B":2, "B+":3, "A":4})

In [20]:
# Check the distribution of characteristics by treatment (treat == 1 for treatment, 0 for control)
balance_quit = quit_data.groupby('treat').agg(
    mean_role=('role', 'mean'),
    mean_male=('male', 'mean'),
    mean_liveFar=('live_far', 'mean'),
).reset_index()
print("balance quit data:\n", balance_quit, "\n")

balance_performance = performance_data.groupby('treat').agg(
    mean_perf_2022h1=('perf_2022h1', 'mean'),
).reset_index()
print("balance performance data:\n", balance_performance, "\n")


balance_wfh = wfh_data.groupby('treat').agg(
    mean_volunteer=('volunteer', 'mean'),
).reset_index()
print("balance wfh data:\n", balance_wfh, "\n")

balance quit data:
    treat  mean_role  mean_male  mean_liveFar
0      0   0.247561   0.654878      0.471698
1      1   0.242424   0.637626      0.499197 

balance performance data:
    treat  mean_perf_2022h1
0      0          2.547724
1      1          2.513353 

balance wfh data:
    treat  mean_volunteer
0      0             0.5
1      1             0.5 



In [21]:
# Did the Intervention Affect WFH Rates?
wfh_rates = wfh_data.groupby('treat')['takeuprate'].mean().reset_index()
print(wfh_rates)

   treat  takeuprate
0      0    0.003527
1      1    0.499694


In [25]:
def make_ate_table(data, outcome, out_cols):
  table = data.groupby("treat")[outcome].agg(
        mean = 'mean' ,
        std = 'std' ,
        N = 'count',
        sem = "sem")

  # Confidence intervals
  table['lb'] = table['mean'] - 1.96 * table['sem']
  table['ub'] = table['mean'] + 1.96 * table['sem']
  table["error_bar"] = table["sem"]*1.96

  # ATE calculation: difference between treatment and control means
  table['ate'] = table['mean'] - table['mean'][0]
  table['se_ate'] = np.sqrt(table['sem']**2 + table['sem'][0]**2)
  table['ate_lb'] = table['ate'] - 1.96 * table['se_ate']
  table['ate_ub'] = table['ate'] + 1.96 * table['se_ate']

  return table[out_cols]

In [44]:
# Effect on Quit rate
cols = ["mean", "ate", "se_ate", "ate_lb", "ate_ub"]
quit_table = make_ate_table(quit_data, "quit", cols)
lb = quit_table["ate_lb"][1]
ub = quit_table["ate_ub"][1]
print(quit_table)
print("Quit Rate Effect:", quit_table["ate"][1])
print(f"95% Confidence Interval for Quit Rate Effect: ({lb},{ub})")

           mean       ate    se_ate    ate_lb    ate_ub
treat                                                  
0      7.195122  0.000000  1.276961 -2.502843  2.502843
1      4.797980 -2.397142  1.180162 -4.710260 -0.084024
Quit Rate Effect: -2.397142153239715
95% Confidence Interval for Quit Rate Effect: (-4.710259829120324,-0.08402447735910545)


In [48]:
# Effect on performance
cols = ["mean", "ate", "se_ate", "ate_lb", "ate_ub"]
perf_table = make_ate_table(performance_data, "perf_2022h1", cols)
lb = perf_table["ate_lb"][1]
ub = perf_table["ate_ub"][1]
print(perf_table)
print("Performance Effect:", perf_table["ate"][1])
print(f"95% Confidence Interval for Performance Effect: ({lb},{ub})")

           mean       ate    se_ate    ate_lb    ate_ub
treat                                                  
0      2.547724  0.000000  0.044509 -0.087238  0.087238
1      2.513353 -0.034371  0.044486 -0.121564  0.052822
Performance Effect: -0.034370819662130625
95% Confidence Interval for Performance Effect: (-0.12156387505698371,0.05282223573272246)


In [51]:
# Calculate differential attrition
attrition_treatment = quit_data[quit_data['treat'] == 1]['quit'].mean()
attrition_control = quit_data[quit_data['treat'] == 0]['quit'].mean()
differential_attrition = attrition_treatment - attrition_control
print("Differential Attrition:", differential_attrition)

# Sort the control group by performance
control_group = performance_data[performance_data['treat'] == 0].sort_values(by='perf_2022h1')

# Trim the control group for upper bound
upper_cut = int(len(control_group) * (1 - differential_attrition))
mean_control_upper = control_group.iloc[:upper_cut]['perf_2022h1'].mean()

# Trim the control group for lower bound
lower_cut = int(len(control_group) * differential_attrition)
mean_control_lower = control_group.iloc[lower_cut:]['perf_2022h1'].mean()

# Calculate treatment group mean
mean_treatment = performance_data[performance_data['treat'] == 1]['perf_2022h1'].mean()

# Calculate Lee bounds for ITT
itt_upper = mean_treatment - mean_control_upper
itt_lower = mean_treatment - mean_control_lower

print("Lee bounds for ITT (Performance):", itt_lower, "-", itt_upper)


Differential Attrition: -2.397142153239715
Lee bounds for ITT (Performance): -0.034370819662130625 - -0.034370819662130625
