In [None]:
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams.update({
    'font.size': 14,           
    'axes.titlesize': 16,      
    'axes.labelsize': 14,      
    'legend.fontsize': 12,    
    'xtick.labelsize': 12,   
    'ytick.labelsize': 12   
})
import seaborn as sns
from statsmodels.stats.proportion import proportions_ztest
import os


import os
data_path = "/Users/dariyabaigereyeva/Desktop/product masters/venv/"


users_all_before_ab = pd.read_csv(os.path.join(data_path, "users_all_before_ab.csv"))
groupA_startAB = pd.read_csv(os.path.join(data_path, "groupA_startAB.csv"))
groupB_startAB = pd.read_csv(os.path.join(data_path, "groupB_startAB.csv"))
groupA_finishAB = pd.read_csv(os.path.join(data_path, "groupA_finishAB.csv"))
groupB_finishAB = pd.read_csv(os.path.join(data_path, "groupB_finishAB.csv"))



groupA_startAB["group"] = "Group A"
groupB_startAB["group"] = "Group B"
combined_pre = pd.concat([groupA_startAB, groupB_startAB])

plt.figure(figsize=(10, 6))

sns.histplot(data=combined_pre, x="num_orders", hue="group", stat="count", common_norm=False,
             bins=30, element="step", fill=True, palette=["#FFA07A", "#1E90FF"], kde=True)

plt.title("Number of Orders Before Test")
plt.xlabel("Number of Orders")
plt.ylabel("User Count")
plt.tight_layout()
plt.savefig("slide3_orders_distribution.png", dpi=300)
plt.show()


plt.figure(figsize=(10, 6))
sns.histplot(data=combined_pre, x="total_spent", hue="group", stat="count", common_norm=False,
             bins=30, element="step", fill=True, palette=["#FFA07A", "#1E90FF"], kde=True)
plt.title("Total Spent Before Test")
plt.xlabel("Total Spent")
plt.ylabel("User Count")
plt.tight_layout()
plt.savefig("slide3_total_spent_distribution.png", dpi=300)
plt.show()


plt.figure(figsize=(10, 6))
sns.histplot(data=combined_pre, x="unique_items", hue="group", stat="count", common_norm=False,
             bins=30, element="step", fill=True, palette=["#FFA07A", "#1E90FF"], kde=True)
plt.title("Unique Items Before Test")
plt.xlabel("Unique Items")
plt.ylabel("User Count")
plt.tight_layout()
plt.savefig("slide3_unique_items_distribution.png", dpi=300)
plt.show()



groupA_finishAB["group"] = "Group A"
groupB_finishAB["group"] = "Group B"
combined_post = pd.concat([groupA_finishAB, groupB_finishAB])

plt.figure(figsize=(10, 6))

sns.histplot(data=combined_post, x="num_orders", hue="group", stat="count", common_norm=False,
             bins=30, element="step", fill=True, palette=["#FFA07A", "#1E90FF"], kde=True)

plt.title("Number of Orders After Test")
plt.xlabel("Number of Orders")
plt.ylabel("User Count")
plt.tight_layout()
plt.savefig("slide4_orders_distribution.png", dpi=300)
plt.show()


plt.figure(figsize=(10, 6))
sns.histplot(data=combined_post, x="total_spent", hue="group", stat="count", common_norm=False,
             bins=30, element="step", fill=True, palette=["#FFA07A", "#1E90FF"], kde=True)
plt.title("Total Spent After Test")
plt.xlabel("Total Spent")
plt.ylabel("User Count")
plt.tight_layout()
plt.savefig("slide4_total_spent_distribution.png", dpi=300)
plt.show()


plt.figure(figsize=(10, 6))
sns.histplot(data=combined_post, x="unique_items", hue="group", stat="count", common_norm=False,
             bins=30, element="step", fill=True, palette=["#FFA07A", "#1E90FF"], kde=True)
plt.title("Unique Items After Test")
plt.xlabel("Unique Items")
plt.ylabel("User Count")
plt.tight_layout()
plt.savefig("slide4_unique_items_distribution.png", dpi=300)
plt.show()



conv_A = groupA_finishAB["conversion"].sum()
conv_B = groupB_finishAB["conversion"].sum()
n_A = len(groupA_finishAB)
n_B = len(groupB_finishAB)

z_stat, p_val = proportions_ztest([conv_A, conv_B], [n_A, n_B])
conversion_rates = [conv_A / n_A * 100, conv_B / n_B * 100]

plt.figure(figsize=(6, 5))
bars = plt.bar(["Group A", "Group B"], conversion_rates, color=["blue", "orange"])
plt.title("P2P Conversion Rate by Group")
plt.ylabel("Conversion Rate (%)")
plt.ylim(0, 30)

for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2.0, yval + 0.5, f'{yval:.2f}%', ha='center', va='bottom')

plt.tight_layout()
plt.savefig("slide5_conversion_rates.png")
plt.show()


groupA_finishAB["group"] = "A"
groupB_finishAB["group"] = "B"
ab_combined = pd.concat([groupA_finishAB, groupB_finishAB])
ab_combined["converted"] = ab_combined["conversion"] == 1

segment_summary = ab_combined.groupby(["group", "converted"]).agg({
    "num_orders": "mean",
    "total_spent": "mean",
    "unique_items": "mean"
}).reset_index()

segment_summary["converted"] = segment_summary["converted"].map({True: "Converted", False: "Not Converted"})

fig, axs = plt.subplots(1, 3, figsize=(18, 5))
sns.barplot(data=segment_summary, x="group", y="num_orders", hue="converted", ax=axs[0])
axs[0].set_title("Avg. Number of Orders")

sns.barplot(data=segment_summary, x="group", y="total_spent", hue="converted", ax=axs[1])
axs[1].set_title("Avg. Total Spent")

sns.barplot(data=segment_summary, x="group", y="unique_items", hue="converted", ax=axs[2])
axs[2].set_title("Avg. Unique Items")

plt.tight_layout()
plt.savefig("slide6_client_portraits.png")
plt.show()

