# 가정
국가는 **아랍에미리트**, 이커머스 사이트를 **아마존**이라고 가정

- 참고링크1 : https://blog.naver.com/fedex_kr/222697326719
- 참고링크2 : https://blog.naver.com/dhzhdapxk/222279313175

아랍에미리트의 전자상거래시장의 성장률을 19년도 이후부터 상승하고 있으며,  
2017년도 7월 중동 최대 온라인 쇼핑몰인 수크 닷컴을 아마존에서 인수하였음.

# 라이브러리 로드

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
plt.style.use('ggplot')

# 데이터 로드

In [4]:
try:
  path = 'C:/Users/User/Desktop/AIB_13/CP2/data/'
  df = pd.read_parquet(path + 'light_2019-Oct.parquet', engine='fastparquet')
except:
  path = '/content/drive/MyDrive/CP2/data/'
  df = pd.read_parquet(path + 'light_2019-Oct.parquet', engine='fastparquet')

df.head()

Unnamed: 0,event_time,event_type,product_id,category_id,category_code,brand,price,user_id,user_session
0,2019-10-01 00:00:00 UTC,view,44600062,-251657396,,shiseido,35.790001,541312140,72d76fde-8bb3-4e00-8c23-a032dfed738c
1,2019-10-01 00:00:00 UTC,view,3900821,-780140327,appliances.environment.water_heater,aqua,33.200001,554748717,9333dfbd-b87a-4708-9857-6336556b0fcc
2,2019-10-01 00:00:01 UTC,view,17200506,-1904213353,furniture.living_room.sofa,,543.099976,519107250,566511c2-e2e3-422b-b695-cf8e6e792ca8
3,2019-10-01 00:00:01 UTC,view,1307067,1518338663,computers.notebook,lenovo,251.740005,550050854,7c90fc70-0e80-4590-96f3-13c02c18c713
4,2019-10-01 00:00:04 UTC,view,1004237,-1769995873,electronics.smartphone,apple,1081.97998,535871217,c6bd7419-2748-4c56-95b4-8cec9ff8b80d


# 필요 데이터 전처리

In [5]:
df['event_time'] = df['event_time'].apply(lambda x : x[:-4]).astype('datetime64')
df['event_time'].dtype

dtype('<M8[ns]')

In [6]:
df['event_time'] = df['event_time'] + datetime.timedelta(hours=4)
df['event_time'].tail()

42448759   2019-11-01 03:59:58
42448760   2019-11-01 03:59:58
42448761   2019-11-01 03:59:58
42448762   2019-11-01 03:59:59
42448763   2019-11-01 03:59:59
Name: event_time, dtype: datetime64[ns]

In [7]:
df = df.loc[df['event_time'] < '2019-11-01']

In [8]:
df.head()

Unnamed: 0,event_time,event_type,product_id,category_id,category_code,brand,price,user_id,user_session
0,2019-10-01 04:00:00,view,44600062,-251657396,,shiseido,35.790001,541312140,72d76fde-8bb3-4e00-8c23-a032dfed738c
1,2019-10-01 04:00:00,view,3900821,-780140327,appliances.environment.water_heater,aqua,33.200001,554748717,9333dfbd-b87a-4708-9857-6336556b0fcc
2,2019-10-01 04:00:01,view,17200506,-1904213353,furniture.living_room.sofa,,543.099976,519107250,566511c2-e2e3-422b-b695-cf8e6e792ca8
3,2019-10-01 04:00:01,view,1307067,1518338663,computers.notebook,lenovo,251.740005,550050854,7c90fc70-0e80-4590-96f3-13c02c18c713
4,2019-10-01 04:00:04,view,1004237,-1769995873,electronics.smartphone,apple,1081.97998,535871217,c6bd7419-2748-4c56-95b4-8cec9ff8b80d


# user_id를 기준으로  index 설정

In [9]:
user_df = df.set_index('user_id')
user_df.head()

Unnamed: 0_level_0,event_time,event_type,product_id,category_id,category_code,brand,price,user_session
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
541312140,2019-10-01 04:00:00,view,44600062,-251657396,,shiseido,35.790001,72d76fde-8bb3-4e00-8c23-a032dfed738c
554748717,2019-10-01 04:00:00,view,3900821,-780140327,appliances.environment.water_heater,aqua,33.200001,9333dfbd-b87a-4708-9857-6336556b0fcc
519107250,2019-10-01 04:00:01,view,17200506,-1904213353,furniture.living_room.sofa,,543.099976,566511c2-e2e3-422b-b695-cf8e6e792ca8
550050854,2019-10-01 04:00:01,view,1307067,1518338663,computers.notebook,lenovo,251.740005,7c90fc70-0e80-4590-96f3-13c02c18c713
535871217,2019-10-01 04:00:04,view,1004237,-1769995873,electronics.smartphone,apple,1081.97998,c6bd7419-2748-4c56-95b4-8cec9ff8b80d


In [10]:
user_df = user_df.sort_index()
user_df.head()

Unnamed: 0_level_0,event_time,event_type,product_id,category_id,category_code,brand,price,user_session
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
33869381,2019-10-24 00:04:08,view,7002639,-1350565191,kids.carriage,bumbleride,769.650024,d83dc524-8a2c-4780-bbe0-f8aee03b54df
64078358,2019-10-13 04:13:46,view,10600284,-142605577,,,0.0,6183edb9-f592-4bb8-9913-67bb4711694d
183503497,2019-10-03 01:43:00,view,22200103,1384120925,,,15.77,884233e8-8b9f-4970-808b-4e1c81f8a5fc
184265397,2019-10-15 21:19:28,view,27400002,1694499675,,oral-b,79.769997,c4ddb5b7-3185-453b-8e02-45d48a9775be
184265397,2019-10-04 21:50:50,view,6902303,-1384119625,furniture.living_room.chair,joie,111.459999,2c5d0468-32e1-42f7-9b4e-b17c1d387247


In [11]:
user_df = user_df.reset_index()
user_df.head()

Unnamed: 0,user_id,event_time,event_type,product_id,category_id,category_code,brand,price,user_session
0,33869381,2019-10-24 00:04:08,view,7002639,-1350565191,kids.carriage,bumbleride,769.650024,d83dc524-8a2c-4780-bbe0-f8aee03b54df
1,64078358,2019-10-13 04:13:46,view,10600284,-142605577,,,0.0,6183edb9-f592-4bb8-9913-67bb4711694d
2,183503497,2019-10-03 01:43:00,view,22200103,1384120925,,,15.77,884233e8-8b9f-4970-808b-4e1c81f8a5fc
3,184265397,2019-10-15 21:19:28,view,27400002,1694499675,,oral-b,79.769997,c4ddb5b7-3185-453b-8e02-45d48a9775be
4,184265397,2019-10-04 21:50:50,view,6902303,-1384119625,furniture.living_room.chair,joie,111.459999,2c5d0468-32e1-42f7-9b4e-b17c1d387247


# 사용자 중 view만 존재하는 사용자 확인

In [47]:
cart_user_id = set(user_df.loc[user_df['event_type'] == 'cart']['user_id'].unique())
purchase_user_id = set(user_df.loc[user_df['event_type'] == 'purchase']['user_id'].unique())

cart_or_purchase_users = cart_user_id.union(purchase_user_id)

In [48]:
only_view_user_df = user_df.loc[~user_df['user_id'].isin(list(cart_or_purchase_users))]
only_view_user_df.head()

Unnamed: 0,user_id,event_time,event_type,product_id,category_id,category_code,brand,price,user_session
0,33869381,2019-10-24 00:04:08,view,7002639,-1350565191,kids.carriage,bumbleride,769.650024,d83dc524-8a2c-4780-bbe0-f8aee03b54df
1,64078358,2019-10-13 04:13:46,view,10600284,-142605577,,,0.0,6183edb9-f592-4bb8-9913-67bb4711694d
2,183503497,2019-10-03 01:43:00,view,22200103,1384120925,,,15.77,884233e8-8b9f-4970-808b-4e1c81f8a5fc
3,184265397,2019-10-15 21:19:28,view,27400002,1694499675,,oral-b,79.769997,c4ddb5b7-3185-453b-8e02-45d48a9775be
4,184265397,2019-10-04 21:50:50,view,6902303,-1384119625,furniture.living_room.chair,joie,111.459999,2c5d0468-32e1-42f7-9b4e-b17c1d387247


In [49]:
(only_view_user_df['user_id'].nunique() / user_df['user_id'].nunique()) * 100

84.06265877161199

view만 존재하는 사용자는 전체 사용자의 약 84% 정도입니다.  
이 사용자들을 장바구니 담기나 판매로 전환할 수 있다면 매출이 증가할 것입니다.

# 사용자 중 cart 혹은 purchase가 반드시 하나는 포함된 사용자

In [52]:
cart_or_purchase_user_df = user_df.loc[user_df['user_id'].isin(list(cart_or_purchase_users))]

In [54]:
(cart_or_purchase_user_df['user_id'].nunique() / user_df['user_id'].nunique()) * 100

15.937341228388016