In [1]:
pip install statsmodels

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
#EX1
# Calculating Required Sample Size

import numpy as np
from statsmodels.stats.power import TTestIndPower
import pandas as pd

prob_a = 0.20
prob_b = 0.23
effect_size = (prob_b - prob_a)
print("Effect Size is: ", effect_size)

power = 0.80

alpha = 0.05 # significance level

# In order to calculate the required sample size, we have to use the ttest since we do not know the sample size

stat_model = TTestIndPower()

sample = stat_model.solve_power(effect_size=effect_size, power=power, alpha=alpha, alternative='two-sided')

print("Required Sample Size is: ", (sample))




Effect Size is:  0.03
Required Sample Size is:  17442.872915844462


In [None]:
#EX2

# Understanding the Relationship Between Effect Size and Sample Size

effect_size = np.array([0.2, 0.4, 0.5])

for _ in effect_size:
    sample = stat_model.solve_power(effect_size=_, power=power, alpha=alpha, alternative='two-sided')
    print("Effect Size: ", _, "Sample Size: ", sample)


# as lons as the effect size increases, the sample size decreases since a large effect it is easier to detect changes




Effect Size:  0.2 Sample Size:  393.4056989990351
Effect Size:  0.4 Sample Size:  99.08032683981116
Effect Size:  0.5 Sample Size:  63.76561177540986


In [24]:
#EX3

effect_size = 0.2
power = np.array([0.7, 0.8, 0.9])
                 
for _ in power:
    sample = stat_model.solve_power(effect_size=effect_size, power=_, alpha=alpha, alternative='two-sided')
    print("Power: ", _, "Sample Size: ", sample)

# as long as the power increases, the sample size increases since the power is the probability of detecting an effect when it is present


Power:  0.7 Sample Size:  309.56333467437724
Power:  0.8 Sample Size:  393.4056989990351
Power:  0.9 Sample Size:  526.3332032147733


In [26]:
#EX4

from statsmodels.stats.proportion import proportions_ztest

# Data from the table
emails_sent_A = 500
purchases_A = 45
revenue_A = 5000

emails_sent_B = 500
purchases_B = 55
revenue_B = 6500

# Conversion rates
conversion_rate_A = purchases_A / emails_sent_A
conversion_rate_B = purchases_B / emails_sent_B

# 1. Type of test: This is a test of proportions since we are comparing conversion rates.

# 2. Null and alternative hypotheses:
# Null hypothesis (H0): Conversion rate of Version B <= Conversion rate of Version A
# Alternative hypothesis (H1): Conversion rate of Version B > Conversion rate of Version A

# 3. Perform a statistical test
# Using a two-proportion z-test
successes = np.array([purchases_B, purchases_A])
samples = np.array([emails_sent_B, emails_sent_A])
z_stat, p_value = proportions_ztest(successes, samples, alternative='larger')

# Print results
print(f"Conversion Rate for A: {conversion_rate_A:.2%}")
print(f"Conversion Rate for B: {conversion_rate_B:.2%}")
print(f"Z-statistic: {z_stat:.4f}")
print(f"P-value: {p_value:.4f}")

# 4. Interpretation
if p_value < 0.05:
    print("Reject the null hypothesis. Version B performs significantly better than Version A.")
else:
    print("Fail to reject the null hypothesis. There is not enough evidence to conclude that Version B performs better than Version A.")

#If p_value is 0.02, then we can reject the null hypothesis and stop the experiment.

Conversion Rate for A: 9.00%
Conversion Rate for B: 11.00%
Z-statistic: 1.0541
P-value: 0.1459
Fail to reject the null hypothesis. There is not enough evidence to conclude that Version B performs better than Version A.
