In [2]:
df = pd.read_csv('data/merged_data_brand_dropna.csv')

In [4]:
import pandas as pd

# 1. 날짜 컬럼 생성
df['event_date'] = pd.to_datetime(df['event_time_moscow']).dt.date

# 2. 퍼널 단계 정의
funnel_steps = ['view', 'cart', 'purchase']

In [6]:
# 1. 일자+유저+event_type별 카운트 (0/1)
df['flag'] = 1
daily_funnel = (
    df.drop_duplicates(['event_date', 'user_id', 'event_type'])  # 중복 제거
      .pivot_table(
          index=['event_date', 'user_id'],
          columns='event_type',
          values='flag',
          aggfunc='max',  # 1 또는 NaN
          fill_value=0
      )
      .reset_index()
)

# 2. 필요한 컬럼 순서 고정
daily_funnel = daily_funnel[['event_date', 'user_id'] + funnel_steps]

In [8]:
daily_funnel[daily_funnel['purchase'] == 1]

event_type,event_date,user_id,view,cart,purchase
11,2019-10-01,75972882,1,1,1
22,2019-10-01,128562720,1,1,1
23,2019-10-01,142717569,0,0,1
28,2019-10-01,151045021,1,1,1
37,2019-10-01,171204327,1,1,1
...,...,...,...,...,...
2048811,2020-02-29,622041514,1,1,1
2048838,2020-02-29,622042698,0,1,1
2048841,2020-02-29,622042844,1,1,1
2048907,2020-02-29,622046885,1,1,1


In [10]:
# 4. CSV 저장
daily_funnel.to_csv('data/001_daily_user_funnel.csv', index=False)

- 퍼널 수정

In [3]:
import pandas as pd

# event_time_moscow → datetime 변환
df['event_time_moscow'] = pd.to_datetime(df['event_time_moscow'])
df['event_date'] = df['event_time_moscow'].dt.date

daily_data = []

for date in sorted(df['event_date'].unique()):
    date_df = df[df['event_date'] == date]

    # 세션 집합
    view_sessions = set(date_df[date_df['event_type'] == 'view']['user_session'])
    cart_sessions = set(date_df[date_df['event_type'] == 'cart']['user_session'])
    purchase_sessions = set(date_df[date_df['event_type'] == 'purchase']['user_session'])
    remove_sessions = set(date_df[date_df['event_type'] == 'remove_from_cart']['user_session'])

    # 절대 개수
    view_count = len(date_df[date_df['event_type'] == 'view'])
    cart_count = len(date_df[date_df['event_type'] == 'cart'])
    purchase_count = len(date_df[date_df['event_type'] == 'purchase'])

    # 비율 계산 (%)
    view_to_cart = (len(view_sessions & cart_sessions) / len(view_sessions) * 100) if view_sessions else 0
    cart_to_purchase = (len(cart_sessions & purchase_sessions) / len(cart_sessions) * 100) if cart_sessions else 0
    cart_to_remove = (len(cart_sessions & remove_sessions) / len(cart_sessions) * 100) if cart_sessions else 0

    active_sessions = cart_sessions | purchase_sessions
    view_only_sessions = view_sessions - active_sessions
    page_bounce_rate = (len(view_only_sessions) / len(view_sessions) * 100) if view_sessions else 0

    # 세로(Long) 구조로 저장
    daily_data.extend([
        {'event_date': date, 'event_type': 'view', 'Count': view_count,
         'view_to_cart_rate(%)': round(view_to_cart, 2),
         'cart_to_purchase_rate(%)': round(cart_to_purchase, 2),
         'cart_to_remove_rate(%)': round(cart_to_remove, 2),
         'page_bounce_rate(%)': round(page_bounce_rate, 2)},
        {'event_date': date, 'event_type': 'cart', 'Count': cart_count,
         'view_to_cart_rate(%)': round(view_to_cart, 2),
         'cart_to_purchase_rate(%)': round(cart_to_purchase, 2),
         'cart_to_remove_rate(%)': round(cart_to_remove, 2),
         'page_bounce_rate(%)': round(page_bounce_rate, 2)},
        {'event_date': date, 'event_type': 'purchase', 'Count': purchase_count,
         'view_to_cart_rate(%)': round(view_to_cart, 2),
         'cart_to_purchase_rate(%)': round(cart_to_purchase, 2),
         'cart_to_remove_rate(%)': round(cart_to_remove, 2),
         'page_bounce_rate(%)': round(page_bounce_rate, 2)},
    ])

# DataFrame 생성
funnel_df = pd.DataFrame(daily_data)

funnel_df

Unnamed: 0,event_date,event_type,Count,view_to_cart_rate(%),cart_to_purchase_rate(%),cart_to_remove_rate(%),page_bounce_rate(%)
0,2019-10-01,view,34454,27.10,11.47,29.97,72.59
1,2019-10-01,cart,25314,27.10,11.47,29.97,72.59
2,2019-10-01,purchase,4877,27.10,11.47,29.97,72.59
3,2019-10-02,view,45689,51.36,4.49,12.40,48.33
4,2019-10-02,cart,52929,51.36,4.49,12.40,48.33
...,...,...,...,...,...,...,...
451,2020-02-28,cart,19725,16.07,14.51,37.01,83.60
452,2020-02-28,purchase,4604,16.07,14.51,37.01,83.60
453,2020-02-29,view,37092,15.47,13.70,38.18,84.22
454,2020-02-29,cart,20664,15.47,13.70,38.18,84.22


In [4]:
# 저장
funnel_df.to_csv('data/003_daily_funnel_long.csv', index=False)

In [6]:
import pandas as pd

# event_time_moscow → datetime 변환
df['event_time_moscow'] = pd.to_datetime(df['event_time_moscow'])
df['event_date'] = df['event_time_moscow'].dt.date

daily_data = []

for date in sorted(df['event_date'].unique()):
    date_df = df[df['event_date'] == date]

    # 세션 집합
    view_sessions = set(date_df[date_df['event_type'] == 'view']['user_session'])
    cart_sessions = set(date_df[date_df['event_type'] == 'cart']['user_session'])
    purchase_sessions = set(date_df[date_df['event_type'] == 'purchase']['user_session'])
    remove_sessions = set(date_df[date_df['event_type'] == 'remove_from_cart']['user_session'])

    # 절대 개수
    view_count = len(date_df[date_df['event_type'] == 'view'])
    cart_count = len(date_df[date_df['event_type'] == 'cart'])
    purchase_count = len(date_df[date_df['event_type'] == 'purchase'])

    # 비율 계산 (%)
    view_to_cart = (len(view_sessions & cart_sessions) / len(view_sessions) * 1.0) if view_sessions else 0
    cart_to_purchase = (len(cart_sessions & purchase_sessions) / len(cart_sessions) * 1.0) if cart_sessions else 0

    # 세로(Long) 구조 + rate 매핑
    daily_data.extend([
        {'event_date': date, 'event_type': 'view', 'count': view_count, 'rate': 1.0},
        {'event_date': date, 'event_type': 'cart', 'count': cart_count, 'rate': round(view_to_cart, 4)},
        {'event_date': date, 'event_type': 'purchase', 'count': purchase_count, 'rate': round(cart_to_purchase, 4)},
    ])

# DataFrame 생성
funnel_df = pd.DataFrame(daily_data)

funnel_df

Unnamed: 0,event_date,event_type,count,rate
0,2019-10-01,view,34454,1.0000
1,2019-10-01,cart,25314,0.2710
2,2019-10-01,purchase,4877,0.1147
3,2019-10-02,view,45689,1.0000
4,2019-10-02,cart,52929,0.5136
...,...,...,...,...
451,2020-02-28,cart,19725,0.1607
452,2020-02-28,purchase,4604,0.1451
453,2020-02-29,view,37092,1.0000
454,2020-02-29,cart,20664,0.1547


In [13]:
funnel_df.to_csv('data/004_daily_funnel_rate_long.csv', index=False)

In [14]:
import pandas as pd

# event_time_moscow → datetime 변환
df['event_time_moscow'] = pd.to_datetime(df['event_time_moscow'])
df['event_date'] = df['event_time_moscow'].dt.date

daily_data = []

for date in sorted(df['event_date'].unique()):
    date_df = df[df['event_date'] == date]

    # ---- user_id 기준 집합 ----
    view_users = set(date_df.loc[date_df['event_type'] == 'view', 'user_id'])
    cart_users = set(date_df.loc[date_df['event_type'] == 'cart', 'user_id'])
    purchase_users = set(date_df.loc[date_df['event_type'] == 'purchase', 'user_id'])

    # ---- user_id 기준 count ----
    view_count = len(view_users)
    cart_count = len(cart_users)
    purchase_count = len(purchase_users)

    # ---- 전환율 (0~1) ----
    view_to_cart = (len(view_users & cart_users) / len(view_users)) if view_users else 0.0
    cart_to_purchase = (len(cart_users & purchase_users) / len(cart_users)) if cart_users else 0.0

    # ---- 세로(Long) 구조 + dropoff = 1 - rate ----
    daily_data.extend([
        {'event_date': date, 'event_type': 'view',     'count': view_count,     'rate': 1.0,                     'dropoff': round(1.0 - 1.0, 4)},
        {'event_date': date, 'event_type': 'cart',     'count': cart_count,     'rate': round(view_to_cart, 4),  'dropoff': round(1.0 - view_to_cart, 4)},
        {'event_date': date, 'event_type': 'purchase', 'count': purchase_count, 'rate': round(cart_to_purchase,4),'dropoff': round(1.0 - cart_to_purchase, 4)},
    ])

# DataFrame 생성
funnel_df = pd.DataFrame(daily_data)

funnel_df

Unnamed: 0,event_date,event_type,count,rate,dropoff
0,2019-10-01,view,11611,1.0000,0.0000
1,2019-10-01,cart,4832,0.3521,0.6479
2,2019-10-01,purchase,854,0.1575,0.8425
3,2019-10-02,view,21160,1.0000,0.0000
4,2019-10-02,cart,13890,0.6171,0.3829
...,...,...,...,...,...
451,2020-02-28,cart,3575,0.2136,0.7864
452,2020-02-28,purchase,889,0.2218,0.7782
453,2020-02-29,view,13166,1.0000,0.0000
454,2020-02-29,cart,3343,0.1989,0.8011
