In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gc

# 그래프 기본 테마 설정
# https://coldbrown.co.kr/2023/07/%ED%8C%8C%EC%9D%B4%EC%8D%AC-%EC%8B%A4%EC%A0%84%ED%8E%B8-08-seaborn-sns-set%EC%9D%84-%ED%86%B5%ED%95%B4-%EC%8A%A4%ED%83%80%EC%9D%BC-%EC%84%A4%EC%A0%95%ED%95%98%EA%B8%B0/
sns.set()

# 그래프 기본 설정
plt.rcParams['font.family'] = 'Malgun Gothic'
# plt.rcParams['font.family'] = 'AppleGothic'
plt.rcParams['figure.figsize'] = 12, 6
plt.rcParams['font.size'] = 14
plt.rcParams['axes.unicode_minus'] = False


# 복잡한 통계 처리를 위한 라이브러리
from scipy import stats

In [2]:
df = pd.read_csv('data/merged_data_brand_dropna.csv')

In [3]:
# datetime 컬럼 변환
df['event_time_moscow'] = pd.to_datetime(df['event_time_moscow'])

In [4]:
print("최소 가격:", df['price'].min())
print("최대 가격:", df['price'].max())
print("price dtype:", df['price'].dtype)

최소 가격: 0.06
최대 가격: 327.78
price dtype: float64


In [5]:
import pandas as pd
import numpy as np

# 가격 필터링
df_price = df[df['price'] > 0].copy()

# 가격 구간 설정
price_bins = [0, 40, 80, 120, 160, 200, 240, 280, 320, 360]
bin_labels = [f"{price_bins[i]}-{price_bins[i+1]-1}" for i in range(len(price_bins)-1)]

# 가격 bin 적용 (right=True: 상한값 포함)
df_price['price_bin'] = pd.cut(df_price['price'], bins=price_bins, labels=bin_labels, right=True)

# view/purchase 수 계산
conversion_by_bin = df_price.groupby(['price_bin', 'event_type'])['user_session'].nunique().unstack(fill_value=0).reset_index()

# 전환률 계산
conversion_by_bin['conversion_rate'] = conversion_by_bin['purchase'] / conversion_by_bin['view']
conversion_by_bin.replace([np.inf, np.nan], 0, inplace=True)

# 결과 정리
conversion_by_bin = conversion_by_bin[['price_bin', 'view', 'purchase', 'conversion_rate']]

  conversion_by_bin = df_price.groupby(['price_bin', 'event_type'])['user_session'].nunique().unstack(fill_value=0).reset_index()


In [6]:
conversion_by_bin

event_type,price_bin,view,purchase,conversion_rate
0,0-39,2524326,134904,0.053442
1,40-79,206562,5362,0.025958
2,80-119,63268,954,0.015079
3,120-159,53054,535,0.010084
4,160-199,25138,536,0.021322
5,200-239,8665,134,0.015465
6,240-279,4491,32,0.007125
7,280-319,178,1,0.005618
8,320-359,994,5,0.00503


In [23]:
conversion_by_bin.to_csv("data/05_price_conversion_rate_bin_40.csv", index=False)

In [19]:
# 가격 0 이상 필터링
df_price = df[df['price'] > 0].copy()

# 가격 구간 설정 (30 단위)
price_bins = list(range(0, 361, 30))  # [0, 30, 60, ..., 330, 360]
bin_labels = [f"{price_bins[i]}-{price_bins[i+1]-1}" for i in range(len(price_bins)-1)]

# bin 할당 (right=True로 상한 포함)
df_price['price_bin'] = pd.cut(df_price['price'], bins=price_bins, labels=bin_labels, right=True)

# view / purchase 세션 수 집계
conversion_by_bin = df_price.groupby(['price_bin', 'event_type'])['user_session'].nunique().unstack(fill_value=0).reset_index()

# 전환률 계산
conversion_by_bin['conversion_rate'] = conversion_by_bin['purchase'] / conversion_by_bin['view']
conversion_by_bin.replace([np.inf, np.nan], 0, inplace=True)

# 컬럼 정리
conversion_by_bin = conversion_by_bin[['price_bin', 'view', 'purchase', 'conversion_rate']]

  conversion_by_bin = df_price.groupby(['price_bin', 'event_type'])['user_session'].nunique().unstack(fill_value=0).reset_index()


In [20]:
conversion_by_bin

event_type,price_bin,view,purchase,conversion_rate
0,0-29,2485305,134401,0.054078
1,30-59,197078,5341,0.027101
2,60-89,94922,2028,0.021365
3,90-119,41216,573,0.013902
4,120-149,46858,456,0.009732
5,150-179,11429,117,0.010237
6,180-209,27758,632,0.022768
7,210-239,1037,1,0.000964
8,240-269,180,4,0.022222
9,270-299,4384,28,0.006387


### 월별

In [8]:
import pandas as pd
import numpy as np

# 1. 날짜 및 가격 필터링
df['event_time'] = pd.to_datetime(df['event_time_moscow'], errors='coerce')
df['event_month'] = df['event_time'].dt.to_period('M').astype(str)
df_price = df[df['price'] > 0].copy()

# 2. 가격 구간 설정
price_bins = [0, 40, 80, 120, 160, 200, 240, 280, 320, 360]
bin_labels = [f"{price_bins[i]}-{price_bins[i+1]-1}" for i in range(len(price_bins)-1)]
df_price['price_bin'] = pd.cut(df_price['price'], bins=price_bins, labels=bin_labels, right=True)

# 3. 월별 + 가격대별 전환율 계산
grouped = (
    df_price.groupby(['event_month', 'price_bin', 'event_type'])['user_session']
    .nunique()
    .unstack(fill_value=0)
    .reset_index()
)

# 4. 전환율 계산
grouped['conversion_rate'] = grouped['purchase'] / grouped['view']
grouped.replace([np.inf, np.nan], 0, inplace=True)

# 5. 컬럼 정리
result = grouped[['event_month', 'price_bin', 'view', 'purchase', 'conversion_rate']]
result 

  df['event_month'] = df['event_time'].dt.to_period('M').astype(str)
  df_price.groupby(['event_month', 'price_bin', 'event_type'])['user_session']


event_type,event_month,price_bin,view,purchase,conversion_rate
0,2019-10,0-39,487163,25372,0.052081
1,2019-10,40-79,39683,1230,0.030996
2,2019-10,80-119,12564,198,0.015759
3,2019-10,120-159,10524,98,0.009312
4,2019-10,160-199,4486,82,0.018279
5,2019-10,200-239,2239,37,0.016525
6,2019-10,240-279,1120,7,0.00625
7,2019-10,280-319,19,0,0.0
8,2019-10,320-359,0,0,0.0
9,2019-11,0-39,509483,31997,0.062803


In [9]:
result.to_csv('data/05_conversion_rate_by_month_and_price_bin.csv', index=False)