In [3]:
# import numpy, statsmodels.stats.power, pandas
import numpy as np
import statsmodels.stats.power as smp
import pandas as pd

In [72]:
# 1A
mean_click_rate = 0.05
new_click_rate = mean_click_rate + (0.1 * mean_click_rate)
effect_size = new_click_rate - mean_click_rate
alpha = 0.05
power = 0.90

required_sample_size = smp.NormalIndPower().solve_power(
    effect_size = effect_size,
    power = power,
    alpha = alpha
)
print("Required Sample size per group:", required_sample_size)
print("Total required sample size:", required_sample_size * 2)

Required Sample size per group: 840593.5527752708
Total required sample size: 1681187.1055505415


In [73]:
# 1B
n_treatment = 3200
mde_std = smp.tt_ind_solve_power(
    nobs1 = n_treatment,
    alpha = alpha,
    power = power
)

std_treatment = np.sqrt( .10*(1-.10) )

print("Minimum detectable effect: ", mde_std)
print("MDE: ", mde_std*std_treatment)

Minimum detectable effect:  0.08104803053685428
MDE:  0.024314409161056285


In [74]:
# 1C
power = smp.tt_ind_solve_power(
    effect_size=effect_size,
    nobs1=n_treatment,
    alpha=alpha
)

print("Power to detect an effect equal to 10% of the mean: ", power)

Power to detect an effect equal to 10% of the mean:  0.05459330260010606


In [76]:
# 1D
baseline_click_rate = 0.05
new_click_rate = 0.055
alpha = 0.05
power = 0.90
effect_size = new_click_rate - baseline_click_rate

# Calculate required sample size per group
required_sample_size = smp.NormalIndPower().solve_power(
    effect_size=effect_size,
    power=power,
    alpha=alpha
)
print("Required sample size per group:", required_sample_size)
print("Total required sample size:", required_sample_size*2)

# Total sample size for both groups
total_sample_size = required_sample_size * 2
views_per_hour = 4000
# total hours needed to achieve the total sample size
total_hours_needed = total_sample_size / views_per_hour

print("Total hours needed to run the test:", total_hours_needed)

Required sample size per group: 840593.5527752729
Total required sample size: 1681187.1055505457
Total hours needed to run the test: 420.29677638763644


In [41]:
# Question 2
data = pd.read_csv("upworthy_questions.csv")
# data.head()

total_impressions0 = data['impressions0'].sum()
total_impressions1 = data['impressions1'].sum()
total_clicks0 = data['clicks0'].sum()
total_clicks1 = data['clicks1'].sum()

# Calculate click-through rates
ctr0 = total_clicks0 / total_impressions0
ctr1 = total_clicks1 / total_impressions1

# difference in means
difference_in_means = ctr1 - ctr0

# Create a summary DataFrame
summary_df = pd.DataFrame({
    'Arm': ['Control', 'Treatment'],
    'Total Impressions': [total_impressions0, total_impressions1],
    'Total Clicks': [total_clicks0, total_clicks1],
    'CTR': [ctr0, ctr1]
})

print(summary_df)
print("\n Difference in means (CTR):", difference_in_means)

         Arm  Total Impressions  Total Clicks       CTR
0    Control           24870155        302207  0.012151
1  Treatment           24867195        273798  0.011010

 Difference in means (CTR): -0.0011409824019145802


In [42]:
# Calculate stds
std0 = np.sqrt(ctr0 * (1 - ctr0) / total_impressions0)
std1 = np.sqrt(ctr1 * (1 - ctr1) / total_impressions1)

# Standard error of the difference
se_diff = np.sqrt(std0**2 + std1**2)

# 95% confidence interval
ci_lower = difference_in_means - 1.96 * se_diff
ci_upper = difference_in_means + 1.96 * se_diff

print("95% CI for difference in means: (", ci_lower, ",", ci_upper, ")")

95% CI for difference in means: ( -0.0012004499012693817 , -0.0010815149025597787 )


In [43]:
# Question 3A

# Calculate CTR for each arm
data['ctr0'] = data['clicks0'] / data['impressions0']
data['ctr1'] = data['clicks1'] / data['impressions1']

# ATE for each experiment
data['ate'] = data['ctr1'] - data['ctr0']

average_ate = data['ate'].mean()

print("Average ATE over all experiments:", average_ate)

Average ATE over all experiments: -0.0012118772185663103


In [44]:
# Question 3B

# standard error for each arm
data['se0'] = np.sqrt((data['ctr0'] * (1 - data['ctr0'])) / data['impressions0'])
data['se1'] = np.sqrt((data['ctr1'] * (1 - data['ctr1'])) / data['impressions1'])

# standard error of the ATE
data['se_ate'] = np.sqrt(data['se0']**2 + data['se1']**2)

# 95% confidence interval for each ATE
data['ate_lb'] = data['ate'] - 1.96 * data['se_ate']
data['ate_ub'] = data['ate'] + 1.96 * data['se_ate']

# How many ATEs are statistically significant
significant_ates = []
for i in range(len(data)):
    if data['ate_lb'][i] > 0 or data['ate_ub'][i] < 0:
        significant_ates.append(data['ate'][i])

# Count of significant ATEs
significant_ates_count = len(significant_ates)
print("Number of statistically significant ATEs:", significant_ates_count)

Number of statistically significant ATEs: 2315


In [45]:
# Question 3C
mean_significant_ate = sum(significant_ates) / len(significant_ates)

print("Mean ATE among statistically significant experiments:", mean_significant_ate)

Mean ATE among statistically significant experiments: -0.003049721899747998
