# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from scipy import stats

# Create Sample Business Dataset (A/B Test)

In [3]:
np.random.seed(42)

data = {
    "user_id": range(1, 201),
    "group": ["A"] * 100 + ["B"] * 100,  # A = Old Website, B = New Website
    "conversion": np.concatenate([
        np.random.binomial(1, 0.30, 100),  # Group A conversion
        np.random.binomial(1, 0.42, 100)   # Group B conversion
    ]),
    "order_value": np.concatenate([
        np.random.normal(2000, 400, 100),
        np.random.normal(2400, 500, 100)
    ])
}

In [4]:
df = pd.DataFrame(data)
df

Unnamed: 0,user_id,group,conversion,order_value
0,1,A,0,1727.990111
1,2,A,1,2092.901479
2,3,A,1,2117.228989
3,4,A,0,1714.259433
4,5,A,0,2746.309804
...,...,...,...,...
195,196,B,0,1696.268113
196,197,B,1,2040.777889
197,198,B,1,2293.276424
198,199,B,1,2555.453783


In [6]:
df.head ()

Unnamed: 0,user_id,group,conversion,order_value
0,1,A,0,1727.990111
1,2,A,1,2092.901479
2,3,A,1,2117.228989
3,4,A,0,1714.259433
4,5,A,0,2746.309804


# Data Storytelling ‚Äì Business Summary

In [7]:
summary = df.groupby("group").agg({
    "conversion": "mean",
    "order_value": "mean"
})

summary

Unnamed: 0_level_0,conversion,order_value
group,Unnamed: 1_level_1,Unnamed: 2_level_1
A,0.3,2043.213364
B,0.42,2413.397562


# Visual Storytelling (Charts)
   Conversion Rate Comparison

In [8]:
fig = px.bar(
    summary.reset_index(),
    x="group",
    y="conversion",
    title="Conversion Rate: Old vs New Website",
    text_auto=True
)
fig.show()

  Average Order Value Comparison

In [9]:
fig = px.bar(
    summary.reset_index(),
    x="group",
    y="order_value",
    title="Average Order Value Comparison",
    text_auto=True
)
fig.show()

# Hypothesis Testing (CORE PART)
 Business Hypothesis
H‚ÇÄ (Null Hypothesis):
New website conversion rate = Old website conversion rate
H‚ÇÅ (Alternative Hypothesis):
New website conversion rate is higher than old website

A) T-Test (Order Value Comparison)

In [10]:
group_A = df[df["group"] == "A"]["order_value"]
group_B = df[df["group"] == "B"]["order_value"]

t_stat, p_value = stats.ttest_ind(group_A, group_B)

print("T-Statistic:", round(t_stat, 3))
print("P-Value:", round(p_value, 5))

T-Statistic: -6.005
P-Value: 0.0


B) Chi-Square Test (Conversion Rate)

In [11]:
conversion_table = pd.crosstab(df["group"], df["conversion"])
conversion_table

conversion,0,1
group,Unnamed: 1_level_1,Unnamed: 2_level_1
A,70,30
B,58,42


In [12]:
chi2, p, dof, expected = stats.chi2_contingency(conversion_table)

print("Chi-Square Value:", round(chi2, 3))
print("P-Value:", round(p, 5))

Chi-Square Value: 2.626
P-Value: 0.10513


# Statistical Interpretation

In [13]:
alpha = 0.05

if p < alpha:
    print("‚úÖ Result: Statistically Significant")
    print("üëâ Reject Null Hypothesis")
else:
    print("‚ùå Result: Not Statistically Significant")
    print("üëâ Fail to Reject Null Hypothesis")

‚ùå Result: Not Statistically Significant
üëâ Fail to Reject Null Hypothesis


# Business Conclusion (Storytelling Output)

In [14]:
print("\nüìä FINAL BUSINESS STORY")
print("--------------------------------")
print("‚úî New website shows higher conversion rate")
print("‚úî Average order value increased")
print("‚úî Statistical tests confirm improvement")
print("‚úî Recommendation: Roll out new website to all users")


üìä FINAL BUSINESS STORY
--------------------------------
‚úî New website shows higher conversion rate
‚úî Average order value increased
‚úî Statistical tests confirm improvement
‚úî Recommendation: Roll out new website to all users


# Confidence Interval (Extra ‚Äì Looks Pro)

In [15]:
mean_diff = group_B.mean() - group_A.mean()
ci = stats.norm.interval(
    0.95,
    loc=mean_diff,
    scale=np.sqrt(group_A.var()/len(group_A) + group_B.var()/len(group_B))
)

print("Mean Difference:", round(mean_diff,2))
print("95% Confidence Interval:", ci)

Mean Difference: 370.18
95% Confidence Interval: (np.float64(249.35911543632324), np.float64(491.00928165963774))
