In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gc

# 그래프 기본 테마 설정
# https://coldbrown.co.kr/2023/07/%ED%8C%8C%EC%9D%B4%EC%8D%AC-%EC%8B%A4%EC%A0%84%ED%8E%B8-08-seaborn-sns-set%EC%9D%84-%ED%86%B5%ED%95%B4-%EC%8A%A4%ED%83%80%EC%9D%BC-%EC%84%A4%EC%A0%95%ED%95%98%EA%B8%B0/
sns.set()

# 그래프 기본 설정
plt.rcParams['font.family'] = 'Malgun Gothic'
# plt.rcParams['font.family'] = 'AppleGothic'
plt.rcParams['figure.figsize'] = 12, 6
plt.rcParams['font.size'] = 14
plt.rcParams['axes.unicode_minus'] = False


# 복잡한 통계 처리를 위한 라이브러리
from scipy import stats

In [4]:
df = pd.read_csv('data/merged_data_brand_dropna.csv')

In [3]:
# datetime 컬럼 변환
df['event_time_moscow'] = pd.to_datetime(df['event_time_moscow'])

### user_session 기준 (한 사용자 행동 흐름 분석)

In [9]:
# 각 전환율과 이탈률
# Set으로 바꾸기
view_sessions = set(df[df['event_type'] == 'view']['user_session'])
cart_sessions = set(df[df['event_type'] == 'cart']['user_session'])
purchase_sessions = set(df[df['event_type'] == 'purchase']['user_session'])
remove_sessions = set(df[df['event_type'] == 'remove_from_cart']['user_session'])

# 전환율
view_to_cart = len(view_sessions & cart_sessions) / len(view_sessions)
cart_to_purchase = len(cart_sessions & purchase_sessions) / len(cart_sessions)
view_to_purchase = len(view_sessions & purchase_sessions) / len(view_sessions)

# 이탈률
cart_abandonment_rate = len(cart_sessions & remove_sessions) / len(cart_sessions)
only_cart_sessions = cart_sessions - purchase_sessions
cart_to_purchase_drop_rate = len(only_cart_sessions) / len(cart_sessions)

# 출력
print(f"View → Cart 전환율: {view_to_cart:.2%}")
print(f"Cart → Purchase 전환율: {cart_to_purchase:.2%}")
print(f"View → Purchase 전환율: {view_to_purchase:.2%}")
print(f"Cart → Remove 이탈률: {cart_abandonment_rate:.2%}")
print(f"Cart → 구매 전환 실패율: {cart_to_purchase_drop_rate:.2%}")

View → Cart 전환율: 19.02%
Cart → Purchase 전환율: 14.19%
View → Purchase 전환율: 3.20%
Cart → Remove 이탈률: 37.75%
Cart → 구매 전환 실패율: 85.81%


In [20]:
import pandas as pd

# 전환 및 이탈률 (유저 세션 기반)
user_based_result = pd.DataFrame([{
    'view_to_cart_rate (%)': round(view_to_cart * 100, 2),
    'cart_to_purchase_rate (%)': round(cart_to_purchase * 100, 2),
    'view_to_purchase_rate (%)': round(view_to_purchase * 100, 2),
    'cart_to_remove_rate (%)': round(cart_abandonment_rate * 100, 2),
    'cart_to_fail_purchase_rate (%)': round(cart_to_purchase_drop_rate * 100, 2),
    'page_bounce_rate (%)': round(page_bounce_rate, 2)
}])

# 저장
user_based_result.to_csv('data/01_user_based_conversion.csv', index=False)
user_based_result

Unnamed: 0,view_to_cart_rate (%),cart_to_purchase_rate (%),view_to_purchase_rate (%),cart_to_remove_rate (%),cart_to_fail_purchase_rate (%),page_bounce_rate (%)
0,19.02,14.19,3.2,37.75,85.81,80.62


In [10]:
# 페이지 이탈 세션: view는 했지만 cart도 purchase도 하지 않은 세션

active_sessions = cart_sessions | purchase_sessions
view_only_sessions = view_sessions - active_sessions

# 페이지 이탈률
page_bounce_rate = len(view_only_sessions) / len(view_sessions) * 100

print(f"페이지 이탈률: {page_bounce_rate:.2f}%")

페이지 이탈률: 80.62%


### 전체 이벤트 순 기준

In [5]:
# 이벤트 수 계산
view_count = len(df[df['event_type'] == 'view'])
cart_count = len(df[df['event_type'] == 'cart'])
purchase_count = len(df[df['event_type'] == 'purchase'])
remove_count = len(df[df['event_type'] == 'remove_from_cart'])

# 안전하게 0으로 나누는 함수
def safe_divide(numerator, denominator):
    return (numerator / denominator * 100) if denominator > 0 else float('nan')

# 전환율 및 이탈률 계산
add_to_cart_rate = safe_divide(cart_count, view_count)        # 장바구니 추가율
purchase_conversion_rate = safe_divide(purchase_count, view_count)  # 구매 전환율
cart_abandonment_rate = safe_divide(remove_count, cart_count)      # 이탈률 (Cart → Remove)

# 결과 출력
print(f"🛒 장바구니 추가율: {add_to_cart_rate:.2f}%")
print(f"💳 구매 전환율: {purchase_conversion_rate:.2f}%")
print(f"🚫 이탈률 (장바구니 → 제거): {cart_abandonment_rate:.2f}%")

🛒 장바구니 추가율: 57.56%
💳 구매 전환율: 12.97%
🚫 이탈률 (장바구니 → 제거): 68.38%


In [12]:
# 전환율 및 이탈률 계산
view_to_cart_rate = safe_divide(cart_count, view_count)          # View → Cart 전환율
cart_to_purchase_rate = safe_divide(purchase_count, cart_count)  # Cart → Purchase 전환율
view_to_purchase_rate = safe_divide(purchase_count, view_count)  # View → Purchase 전환율
cart_to_remove_rate = safe_divide(remove_count, cart_count)      # Cart → Remove 이탈률
cart_to_fail_purchase_rate = safe_divide(cart_count - purchase_count, cart_count)  # Cart → 구매 전환 실패율

# 결과 출력
print(f"📈 View → Cart 전환율: {view_to_cart_rate:.2f}%")
print(f"💼 Cart → Purchase 전환율: {cart_to_purchase_rate:.2f}%")
print(f"🛍️ View → Purchase 전환율: {view_to_purchase_rate:.2f}%")
print(f"🚫 Cart → Remove 이탈률: {cart_to_remove_rate:.2f}%")
print(f"❌ Cart → 구매 전환 실패율: {cart_to_fail_purchase_rate:.2f}%")

📈 View → Cart 전환율: 57.56%
💼 Cart → Purchase 전환율: 22.53%
🛍️ View → Purchase 전환율: 12.97%
🚫 Cart → Remove 이탈률: 68.38%
❌ Cart → 구매 전환 실패율: 77.47%


In [21]:
# 전환 및 이탈률 (이벤트 수 기반)
event_based_result = pd.DataFrame([{
    'view_to_cart_rate (%)': round(view_to_cart_rate, 2),
    'cart_to_purchase_rate (%)': round(cart_to_purchase_rate, 2),
    'view_to_purchase_rate (%)': round(view_to_purchase_rate, 2),
    'cart_to_remove_rate (%)': round(cart_to_remove_rate, 2),
    'cart_to_fail_purchase_rate (%)': round(cart_to_fail_purchase_rate, 2)
}])

# 저장
event_based_result.to_csv('02_event_based_conversion.csv', index=False)
event_based_result

Unnamed: 0,view_to_cart_rate (%),cart_to_purchase_rate (%),view_to_purchase_rate (%),cart_to_remove_rate (%),cart_to_fail_purchase_rate (%)
0,57.56,22.53,12.97,68.38,77.47


In [23]:
# event_type별 csv 생성
event_types = df['event_type'].unique()

for event in event_types:
    df_event = df[df['event_type'] == event].copy()
    filename = f"data/event_type_{event}.csv"
    df_event.to_csv(filename, index=False)
    print(f"{filename} 저장 완료")

data/event_type_cart.csv 저장 완료
data/event_type_view.csv 저장 완료
data/event_type_remove_from_cart.csv 저장 완료
data/event_type_purchase.csv 저장 완료


In [13]:
purchase_count = len(df[df['event_type'] == 'purchase'])
print(f"✅ 전체 구매 완료 건수: {purchase_count:,}건")

✅ 전체 구매 완료 건수: 736,971건


In [22]:
# 구매 완료 건수
purchase_result = pd.DataFrame([{
    'purchase_count': purchase_count
}])

# 저장
purchase_result.to_csv('03_total_purchase_count.csv', index=False)
purchase_result

---