In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gc

# 그래프 기본 테마 설정
sns.set_style("white")
sns.set_context("notebook")

# 폰트 적용
import platform
from matplotlib import font_manager as fm
font_path = "font/NanumGothic.ttf"

font_prop = fm.FontProperties(fname=font_path)
plt.rcParams['font.family'] = font_prop.get_name()

# 그래프 기본 설정
plt.rcParams['figure.figsize'] = 12, 6
plt.rcParams['font.size'] = 14
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['axes.facecolor'] = 'white'      # 축 배경 흰색
plt.rcParams['figure.facecolor'] = 'white'    # 전체 배경 흰색
plt.rcParams['axes.grid'] = False             # 그리드 제거
plt.rcParams['savefig.facecolor'] = 'white'   # 저장 이미지 배경도 흰색

# 경고 무시
import warnings
warnings.filterwarnings('ignore')

# 복잡한 통계 처리를 위한 라이브러리
from scipy import stats

In [3]:
df = pd.read_csv('data/merged_data_brand_dropna.csv')

In [14]:
# 분석 대상 브랜드 리스트
target_brands = ['eunyul', 'severina', 'supertan', 'elskin', 'cosima']

# 이벤트별 데이터 분리 후 중복 제거 (user_id 기준)
df['event_month'] = pd.to_datetime(df['event_time_moscow']).dt.to_period('M')
filtered = df[df['brand'].isin(target_brands)].copy()

view_df = filtered[filtered['event_type'] == 'view'][['event_month', 'brand', 'user_id']].drop_duplicates()
cart_df = filtered[filtered['event_type'] == 'cart'][['event_month', 'brand', 'user_id']].drop_duplicates()
purchase_df = filtered[filtered['event_type'] == 'purchase'][['event_month', 'brand', 'user_id']].drop_duplicates()

# 유저 수 집계
view_counts = view_df.groupby(['event_month', 'brand'])['user_id'].nunique().reset_index(name='view_users')
cart_counts = cart_df.groupby(['event_month', 'brand'])['user_id'].nunique().reset_index(name='cart_users')
purchase_counts = purchase_df.groupby(['event_month', 'brand'])['user_id'].nunique().reset_index(name='purchase_users')

# 병합
merged = view_counts.merge(cart_counts, on=['event_month', 'brand'], how='left') \
                    .merge(purchase_counts, on=['event_month', 'brand'], how='left')

# 결측값 처리
merged[['cart_users', 'purchase_users']] = merged[['cart_users', 'purchase_users']].fillna(0)

# 전환율 계산
merged['view_to_cart_rate'] = (merged['cart_users'] / merged['view_users']).round(4)
merged['cart_to_purchase_rate'] = (merged['purchase_users'] / merged['cart_users'].replace(0, pd.NA)).round(4)
merged['view_to_purchase_rate'] = (merged['purchase_users'] / merged['view_users']).round(4)

# 보기 좋게 정렬
merged = merged.sort_values(['brand', 'event_month']).reset_index(drop=True)

In [12]:
# 원하는 브랜드 순서 지정
brand_order = ['eunyul', 'severina', 'supertan', 'elskin', 'cosima']

# 브랜드 컬럼을 카테고리형으로 변환하여 순서 고정
merged['brand'] = pd.Categorical(merged['brand'], categories=brand_order, ordered=True)

# 정렬 시 브랜드 순서 유지
merged = merged.sort_values(['brand', 'event_month']).reset_index(drop=True)

In [13]:
merged

Unnamed: 0,event_month,brand,view_users,cart_users,purchase_users,view_to_cart_rate,cart_to_purchase_rate,view_to_purchase_rate
0,2019-10,eunyul,268,447,142,1.6679,0.3177,0.5299
1,2019-11,eunyul,259,431,146,1.6641,0.3387,0.5637
2,2019-12,eunyul,266,326,118,1.2256,0.362,0.4436
3,2020-01,eunyul,243,341,118,1.4033,0.346,0.4856
4,2020-02,eunyul,173,280,100,1.6185,0.3571,0.578
5,2019-10,severina,2828,3390,1416,1.1987,0.4177,0.5007
6,2019-11,severina,3178,3689,1759,1.1608,0.4768,0.5535
7,2019-12,severina,2539,2829,1284,1.1142,0.4539,0.5057
8,2020-01,severina,3511,3798,1746,1.0817,0.4597,0.4973
9,2020-02,severina,3200,3395,1424,1.0609,0.4194,0.445


In [15]:
merged.to_csv('data/13_monthly_brand_funnel.csv', index=False)