In [15]:
import numpy as np
import scipy.stats as stats
import statsmodels.api as sm

Two Population Comparison: Proportions - Confidence Interval 

In [16]:
# Sample data for Population 1
n1 = 100  # Sample size for Population 1
x1 = 35   # Number of successes (e.g., number of people who bought a product) for Population 1

# Sample data for Population 2
n2 = 120  # Sample size for Population 2
x2 = 45   # Number of successes (e.g., number of people who bought a product) for Population 2

# Calculate proportions for each population
p1 = x1 / n1
p2 = x2 / n2

# Calculate confidence intervals
conf_int1 = sm.stats.proportion_confint(x1, n1, alpha=0.05, method='normal')
conf_int2 = sm.stats.proportion_confint(x2, n2, alpha=0.05, method='normal')

print(conf_int1)
print(conf_int2)

# Check if confidence intervals overlap
overlap = conf_int1[1] >= conf_int2[0] and conf_int2[1] >= conf_int1[0]

# Print results
print("Population 1 proportion:", p1)
print("Population 2 proportion:", p2)
print("Confidence interval for Population 1:", conf_int1)
print("Confidence interval for Population 2:", conf_int2)

if overlap:
    print("The confidence intervals overlap, suggesting no significant difference.")
else:
    print("The confidence intervals do not overlap, suggesting a significant difference.")


(0.25651567608909426, 0.4434843239109057)
(0.2883810109781451, 0.4616189890218549)
Population 1 proportion: 0.35
Population 2 proportion: 0.375
Confidence interval for Population 1: (0.25651567608909426, 0.4434843239109057)
Confidence interval for Population 2: (0.2883810109781451, 0.4616189890218549)
The confidence intervals overlap, suggesting no significant difference.


Two Population Comparison: Proportions - Hypothesis Test 

In [17]:
# Sample data for Population 1
n1 = 100  # Sample size for Population 1
x1 = 35   # Number of successes (e.g., number of people who bought a product) for Population 1

# Sample data for Population 2
n2 = 120  # Sample size for Population 2
x2 = 45   # Number of successes (e.g., number of people who bought a product) for Population 2

# Perform a hypothesis test for proportions
# There is a significant difference between the proportions in either direction (e.g., is p1 different from p2 or is p2 different from p1). 
# It is used when you want to determine if the proportions are not equal, but you do not specify a particular direction (greater or less than).
z_stat, p_value = sm.stats.proportions_ztest([x1, x2], [n1, n2], alternative='two-sided')

# Print results
print("Z-statistic:", z_stat)
print("P-value:", p_value)

alpha = 0.05

if p_value < alpha:
    print("Reject the null hypothesis: There is a significant difference in proportions.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference in proportions.")

Z-statistic: -0.38382473678936957
P-value: 0.701108352470263
Fail to reject the null hypothesis: There is no significant difference in proportions.


Two Population Comparison: Means - Confidence Interval

In [18]:
# Sample data for Population 1
data1 = np.array([85, 90, 88, 92, 87])

# Sample data for Population 2
data2 = np.array([78, 82, 80, 85, 79])

# Calculate means and standard errors for both populations
mean1 = np.mean(data1)
std_err1 = stats.sem(data1)
conf_int1 = stats.t.interval(0.95, len(data1) - 1, loc=mean1, scale=std_err1)

mean2 = np.mean(data2)
std_err2 = stats.sem(data2)
conf_int2 = stats.t.interval(0.95, len(data2) - 1, loc=mean2, scale=std_err2)

# Check if confidence intervals overlap
overlap = conf_int1[1] >= conf_int2[0] and conf_int2[1] >= conf_int1[0]

# Print results
print("Mean of Population 1:", mean1)
print("Confidence interval for Population 1:", conf_int1)
print("Mean of Population 2:", mean2)
print("Confidence interval for Population 2:", conf_int2)

if overlap:
    print("The confidence intervals overlap, suggesting no significant difference.")
else:
    print("The confidence intervals do not overlap, suggesting a significant difference.")

Mean of Population 1: 88.4
Confidence interval for Population 1: (85.04520861507335, 91.75479138492666)
Mean of Population 2: 80.8
Confidence interval for Population 2: (77.35452223484866, 84.24547776515134)
The confidence intervals do not overlap, suggesting a significant difference.


Two Population Comparison: Means - Hypothesis Test

In [19]:
import numpy as np
import scipy.stats as stats

# Sample data for Population 1
data1 = np.array([85, 90, 88, 92, 87])

# Sample data for Population 2
data2 = np.array([78, 82, 80, 85, 79])

# Perform a two-sample t-test for means
t_stat, p_value = stats.ttest_ind(data1, data2)

# Print results
print("T-statistic:", t_stat)
print("P-value:", p_value)

alpha = 0.05

if p_value < alpha:
    print("Reject the null hypothesis: There is a significant difference in means.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference in means.")


T-statistic: 4.387862045841161
P-value: 0.0023241881225952348
Reject the null hypothesis: There is a significant difference in means.
