In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats



In [2]:
np.random.seed(42)

n_users = 1000
groups = np.random.choice(["A", "B"], size=n_users, p=[0.5, 0.5])

# Symulujemy współczynniki konwersji (np. 10% dla grupy A, 12% dla grupy B)
conversion_rates = {"A": 0.10, "B": 0.12}
conversions = np.array([np.random.rand() < conversion_rates[group] for group in groups])


df = pd.DataFrame({"Group": groups, "Converted": conversions})


conversion_summary = df.groupby("Group")["Converted"].agg(["sum", "count", "mean"])
conversion_summary.columns = ["Total Conversions", "Total Users", "Conversion Rate"]
conversion_summary


Unnamed: 0_level_0,Total Conversions,Total Users,Conversion Rate
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,61,503,0.121272
B,57,497,0.114688


In [3]:
# Tworzymy tabelę kontyngencji dla testu chi-kwadrat
contingency_table = pd.crosstab(df["Group"], df["Converted"])

#test chi-kwadrat
chi2, p_value, _, _ = stats.chi2_contingency(contingency_table)

p_value


0.8222351797486572

In [4]:

conversion_rates_new = {"A": 0.10, "B": 0.15}


conversions_new = np.array([np.random.rand() < conversion_rates_new[group] for group in groups])


df_new = pd.DataFrame({"Group": groups, "Converted": conversions_new})


conversion_summary_new = df_new.groupby("Group")["Converted"].agg(["sum", "count", "mean"])
conversion_summary_new.columns = ["Total Conversions", "Total Users", "Conversion Rate"]


contingency_table_new = pd.crosstab(df_new["Group"], df_new["Converted"])
chi2_new, p_value_new, _, _ = stats.chi2_contingency(contingency_table_new)

conversion_summary_new, p_value_new


(       Total Conversions  Total Users  Conversion Rate
 Group                                                 
 A                     57          503         0.113320
 B                     72          497         0.144869,
 0.16337653535024957)

In [5]:

conversion_rates_strong = {"A": 0.10, "B": 0.20}


conversions_strong = np.array([np.random.rand() < conversion_rates_strong[group] for group in groups])


df_strong = pd.DataFrame({"Group": groups, "Converted": conversions_strong})


conversion_summary_strong = df_strong.groupby("Group")["Converted"].agg(["sum", "count", "mean"])
conversion_summary_strong.columns = ["Total Conversions", "Total Users", "Conversion Rate"]


contingency_table_strong = pd.crosstab(df_strong["Group"], df_strong["Converted"])
chi2_strong, p_value_strong, _, _ = stats.chi2_contingency(contingency_table_strong)

conversion_summary_strong, p_value_strong


(       Total Conversions  Total Users  Conversion Rate
 Group                                                 
 A                     53          503         0.105368
 B                     97          497         0.195171,
 0.000101107215989342)

In [6]:

np.random.seed(42)
n_users = 1000

mean_time_A = 120  
mean_time_B = 135 
std_dev = 30  


time_spent_A = np.random.normal(mean_time_A, std_dev, n_users // 2)
time_spent_B = np.random.normal(mean_time_B, std_dev, n_users // 2)


df_time = pd.DataFrame({
    "Group": ["A"] * (n_users // 2) + ["B"] * (n_users // 2),
    "Time Spent": np.concatenate([time_spent_A, time_spent_B])
})

# test t-Studenta
t_stat, p_value_ttest = stats.ttest_ind(time_spent_A, time_spent_B)


time_summary = df_time.groupby("Group")["Time Spent"].agg(["mean", "std", "count"])
time_summary.columns = ["Mean Time (s)", "Std Dev", "Sample Size"]

time_summary, p_value_ttest


(       Mean Time (s)    Std Dev  Sample Size
 Group                                       
 A         120.205140  29.437597          500
 B         135.954784  29.339916          500,
 8.460275618434352e-17)