In [None]:
!pip install pandas numpy scipy matplotlib seaborn statsmodels

In [None]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns

print("Libraries loaded!")

In [None]:
np.random.seed(42) 

data = pd.DataFrame({
    'user_id': range(20000),
    'group': ['control']*10000 + ['treatment']*10000,
    'converted': np.concatenate([np.random.binomial(1, 0.075, 10000), 
                                 np.random.binomial(1, 0.090, 10000)])
})

ab_data = data.sample(frac=1).reset_index(drop=True)
print("Data ban gaya!")

In [None]:
ab_data.head()

In [None]:
summary = ab_data.groupby('group')['converted'].agg(['count', 'sum'])
summary.columns = ['Total Users', 'Conversions']
summary['Conversion Rate'] = (summary['Conversions'] / summary['Total Users'])

print(summary)

In [None]:
conv_control = summary.loc['control', 'Conversions']
conv_treatment = summary.loc['treatment', 'Conversions']

n_control = summary.loc['control', 'Total Users']
n_treatment = summary.loc['treatment', 'Total Users']

print(f"Control Conv: {conv_control}, Treatment Conv: {conv_treatment}")

In [None]:
from statsmodels.stats.proportion import proportions_ztest

z_score, p_value = proportions_ztest(
    count=[conv_treatment, conv_control], 
    nobs=[n_treatment, n_control], 
    alternative='larger'
)

print(f"Z-Score: {z_score:.2f}")
print(f"P-Value: {p_value:.5f}")

In [None]:
if p_value < 0.05:
    print("✅ SUCCESS: Naya design purane se better hai!")
else:
    print("❌ FAIL: Naye design mein koi khaas farq nahi hai.")

In [None]:
sns.barplot(x=ab_data['group'], y=ab_data['converted'], errorbar=('ci', 95))
plt.title('Conversion Rate Comparison')
plt.ylabel('Conversion Rate')
plt.show()

In [None]:
from statsmodels.stats.proportion import proportion_confint

(lower_con, upper_con) = proportion_confint(conv_control, n_control, alpha=0.05)
(lower_treat, upper_treat) = proportion_confint(conv_treatment, n_treatment, alpha=0.05)

print(f"Control Group Range:   {lower_con:.2%} to {upper_con:.2%}")
print(f"Treatment Group Range: {lower_treat:.2%} to {upper_treat:.2%}")

In [None]:
from statsmodels.stats.power import TTestIndPower

analysis = TTestIndPower()
required_n = analysis.solve_power(effect_size=0.2, power=0.8, alpha=0.05)

print(f"Kam se kam {int(required_n)} users chahiye the har group mein.")
print(f"Humare paas {n_control} users the -> Data kaafi hai! ✅")