In [16]:
import pandas as pd
import numpy as np

# 0) 데이터 준비

hackle = pd.read_csv("clean_vote_ver2/processed_hackle_merge.csv")


  hackle = pd.read_csv("clean_vote_ver2/processed_hackle_merge.csv")


In [17]:
# 1) datetime 정리 (ns 제거 -> 초 단위)

hackle["event_datetime"] = pd.to_datetime(hackle["event_datetime"], errors="coerce").dt.floor("s")

# 필수값 없는 행 제거 (퍼널 계산용)
base = hackle.dropna(subset=["user_id", "event_key", "event_datetime"]).copy()

In [18]:
# 2) 퍼널 정의

# 참여
ENGAGEMENT_KEYS = ["complete_question"]

# 수익
REVENUE_KEYS = ["complete_purchase"]

In [19]:
# 3) 단계별 유저

# 참여
# 유저별 참여 최초 시각
eng_first = (base[base["event_key"].isin(ENGAGEMENT_KEYS)].groupby("user_id")["event_datetime"].min().rename("eng_first"))

# 참여 유저가 없으면 종료
if eng_first.empty:
    print("참여 유저 0명")
else:
    # 유저별 전체 로그 최대 시각
    max_time = (base.groupby("user_id")["event_datetime"].max().rename("max_time"))

    # 참여 이후 로그 존재 = 리텐션
    # 참여 이후 이벤트가 1개라도 있으면 True
    tmp = pd.concat([eng_first, max_time], axis=1)
    tmp["is_retained"] = tmp["max_time"] > tmp["eng_first"]  

    # 수익: 유저별 구매 최초 시각
    rev_first = (base[base["event_key"].isin(REVENUE_KEYS)].groupby("user_id")["event_datetime"].min().rename("rev_first"))

    tmp = tmp.join(rev_first, how="left")
    tmp["is_revenue"] = tmp["rev_first"].notna()

    # 퍼널 카운트 (참여 -> 리텐션 -> 수익)
    engaged_cnt = tmp["eng_first"].notna().sum()
    retained_cnt = tmp["is_retained"].sum()
    revenue_cnt = (tmp["is_retained"] & tmp["is_revenue"]).sum()

    # 전환율
    ret_rate = retained_cnt / engaged_cnt if engaged_cnt else np.nan
    rev_rate_from_ret = revenue_cnt / retained_cnt if retained_cnt else np.nan
    rev_rate_from_eng = (tmp["is_revenue"].sum()) / engaged_cnt if engaged_cnt else np.nan

    print("---Funnel---")
    print(f"참여 유저 수: {engaged_cnt:,}")
    print(f"참여 이후 로그 존재 유저 수: {retained_cnt:,} | 전환율: {ret_rate:.2%}")
    print(f"구매 완료 유저 수 (리텐션된 유저 중): {revenue_cnt:,} | 전환율: {rev_rate_from_ret:.2%}")
    print(f"구매 완료 유저 수 (참여 유저 중 전체): {tmp['is_revenue'].sum():,} | 전환율: {rev_rate_from_eng:.2%}")


---Funnel---
참여 유저 수: 48,981
참여 이후 로그 존재 유저 수: 46,030 | 전환율: 93.98%
구매 완료 유저 수 (리텐션된 유저 중): 934 | 전환율: 2.03%
구매 완료 유저 수 (참여 유저 중 전체): 1,617 | 전환율: 3.30%


In [None]:
# 퍼널 유저 목록 뽑기

funnel_users = tmp.loc[tmp["is_retained"] & tmp["is_revenue"]].copy()
funnel_users = funnel_users.sort_values("rev_first").reset_index()  # user_id가 index라서
funnel_users.rename(columns={"index": "user_id"}, inplace=True)

print("리텐션 & 구매 유저")
funnel_users.head(20)


리텐션 & 구매 유저


Unnamed: 0,user_id,eng_first,max_time,is_retained,rev_first,is_revenue
0,1578432.0,2023-07-18 07:29:03,2023-08-09 11:50:42,True,2023-07-18 00:17:32,True
1,1222538.0,2023-07-18 06:20:49,2023-08-10 10:24:36,True,2023-07-18 06:19:18,True
2,1261294.0,2023-07-18 09:08:42,2023-08-10 17:46:19,True,2023-07-18 09:07:49,True
3,1559563.0,2023-07-18 11:19:36,2023-08-06 20:24:26,True,2023-07-18 16:32:57,True
4,1578916.0,2023-07-18 13:55:48,2023-08-07 05:47:15,True,2023-07-18 17:07:41,True
5,1403685.0,2023-07-18 15:59:18,2023-08-09 22:01:22,True,2023-07-18 17:08:02,True
6,1003836.0,2023-07-18 18:36:23,2023-08-07 16:56:32,True,2023-07-18 18:37:11,True
7,1015246.0,2023-07-19 14:38:16,2023-08-07 06:36:06,True,2023-07-18 18:38:40,True
8,1015058.0,2023-07-18 18:39:34,2023-08-01 15:07:28,True,2023-07-18 18:45:27,True
9,1024621.0,2023-07-18 18:48:01,2023-07-30 09:33:32,True,2023-07-18 18:48:39,True


In [None]:
# 5) 리텐션 정의가 맞는지 검증용: 특정 유저의 참여 시점 이후 로그 10개 보기

# uid = funnel_users["user_id"].iloc[0]
# t0 = tmp.loc[uid, "eng_first"]
# check = base[(base["user_id"] == uid) & (base["event_datetime"] >= t0)].sort_values("event_datetime")
# check[["event_datetime", "event_key", "heart_balance", "friend_count", "votes_count"]].head(10)

Unnamed: 0,event_datetime,event_key,heart_balance,friend_count,votes_count
2593922,2023-07-18 07:29:03,complete_question,300.0,0.0,0.0
5589074,2023-07-18 07:29:03,complete_question,300.0,0.0,0.0
2594257,2023-07-18 07:29:14,click_appbar_chat_rooms,300.0,0.0,0.0
5589409,2023-07-18 07:29:14,click_appbar_chat_rooms,300.0,0.0,0.0
5589216,2023-07-18 07:29:19,click_question_open,300.0,0.0,0.0
2594064,2023-07-18 07:29:19,click_question_open,300.0,0.0,0.0
2594319,2023-07-18 07:29:22,click_appbar_chat_rooms,300.0,0.0,0.0
5589471,2023-07-18 07:29:22,click_appbar_chat_rooms,300.0,0.0,0.0
2593919,2023-07-18 07:29:27,click_question_open,300.0,0.0,0.0
5589071,2023-07-18 07:29:27,click_question_open,300.0,0.0,0.0


In [None]:
# 모든 기록 해클로 확인