In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# where we'll save charts for GitHub
FIG_DIR = "../reports/figures"
os.makedirs(FIG_DIR, exist_ok=True)

pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 120)

In [None]:
df = pd.read_csv("../data/raw/events.csv")
df["event_time"] = pd.to_datetime(df["event_time"])

df.head()

In [None]:
df.shape, df["event_name"].value_counts()

In [None]:
STAGES = ["visit", "sign_up", "kyc_start", "kyc_approved", "first_deposit", "first_trade", "retained_7d"]

funnel = (
    df[df["event_name"].isin(STAGES)]
    .groupby("event_name")["user_id"]
    .nunique()
    .reindex(STAGES)
    .rename("users")
    .to_frame()
)

funnel["overall_conv_from_visit"] = funnel["users"] / funnel.loc["visit", "users"]
funnel["step_conv_from_prev"] = funnel["users"] / funnel["users"].shift(1)
funnel["step_dropoff"] = 1 - funnel["step_conv_from_prev"]

funnel


In [None]:
FIG_DIR = "../reports/figures"
os.makedirs(FIG_DIR, exist_ok=True)

plt.figure()
plt.bar(funnel.index, funnel["users"])
plt.xticks(rotation=45, ha="right")
plt.title("Forex App Funnel — Unique Users by Stage")
plt.ylabel("Users")
plt.tight_layout()

out_path = f"{FIG_DIR}/01_funnel_users_by_stage.png"
plt.savefig(out_path, dpi=200)
plt.show()

out_path


In [None]:
plt.figure()
plt.plot(funnel.index, funnel["step_conv_from_prev"], marker="o")
plt.xticks(rotation=45, ha="right")
plt.title("Step Conversion Rate (Stage → Next Stage)")
plt.ylabel("Conversion rate")
plt.ylim(0, 1.05)
plt.tight_layout()

out_path = f"{FIG_DIR}/02_step_conversion_rates.png"
plt.savefig(out_path, dpi=200)
plt.show()

out_path


In [None]:
channel_tbl = (
    df[df["event_name"].isin(STAGES)]
    .pivot_table(index="channel", columns="event_name", values="user_id", aggfunc=pd.Series.nunique)
    .reindex(columns=STAGES)
    .fillna(0)
    .astype(int)
)

channel_tbl["visit_to_signup"] = channel_tbl["sign_up"] / channel_tbl["visit"]
channel_tbl["signup_to_kyc_approved"] = channel_tbl["kyc_approved"] / channel_tbl["sign_up"]
channel_tbl["kyc_to_deposit"] = channel_tbl["first_deposit"] / channel_tbl["kyc_approved"]
channel_tbl["deposit_to_trade"] = channel_tbl["first_trade"] / channel_tbl["first_deposit"]
channel_tbl["trade_to_retained_7d"] = channel_tbl["retained_7d"] / channel_tbl["first_trade"]

channel_tbl.sort_values("first_trade", ascending=False)

In [None]:
plot_df = channel_tbl[["first_trade", "retained_7d"]].sort_values("first_trade", ascending=False)

plt.figure()
x = np.arange(len(plot_df.index))
width = 0.4

plt.bar(x - width/2, plot_df["first_trade"], width, label="first_trade users")
plt.bar(x + width/2, plot_df["retained_7d"], width, label="retained_7d users")

plt.xticks(x, plot_df.index)
plt.title("Channel Performance — First Trade vs 7-day Retention (Users)")
plt.ylabel("Users")
plt.legend()
plt.tight_layout()

out_path = f"{FIG_DIR}/03_channel_trade_vs_retention.png"
plt.savefig(out_path, dpi=200)
plt.show()

out_path