In [40]:
import pandas as pd
import matplotlib.pyplot as plt
from pyecharts.charts import Funnel
from pyecharts.globals import CurrentConfig, NotebookType
from pyecharts import options as opts
from pyecharts.charts import Line
from pyecharts.charts import Bar
# 使df在jupyter notebook中输出不换行
pd.set_option('display.expand_frame_repr', False)

In [41]:
# 数据集导入
df = pd.read_csv('../user_action_after_preprocess.csv')
print('数据集数据框：\n', df)

数据集数据框：
             user_id    item_id  behavior_type  item_category           time   date  mon  day  hour
0          98047837  232431562              1           4245  2014-12-06 02  12-06   12    6     2
1          97726136  383583590              1           5894  2014-12-09 20  12-09   12    9    20
2          98607707   64749712              1           2883  2014-12-18 11  12-18   12   18    11
3          98662432  320593836              1           6562  2014-12-06 10  12-06   12    6    10
4          98145908  290208520              1          13926  2014-12-16 21  12-16   12   16    21
...             ...        ...            ...            ...            ...    ...  ...  ...   ...
12256901   93812622  378365755              1             11  2014-12-13 21  12-13   12   13    21
12256902   93812622  177724753              1          12311  2014-12-14 21  12-14   12   14    21
12256903   93812622  234391443              1           8765  2014-12-11 16  12-11   12   11    16
1

In [42]:
# 提取同一行为的数据框
browse_df = df[df['behavior_type']==1] # 浏览数据框
collect_df = df[df['behavior_type']==2] # 收藏数据框
add_shopping_cart_df = df[df['behavior_type']==3] # 加入购物车数据框
buy_df = df[df['behavior_type']==4] # 购买数据框

print('浏览行为数据框：\n', browse_df.head(), '\n')
print('收藏行为数据框：\n', collect_df.head(), '\n')
print('加入购物车行为数据框：\n', add_shopping_cart_df.head(), '\n')
print('购买行为数据框：\n', buy_df.head())

浏览行为数据框：
     user_id    item_id  behavior_type  item_category           time   date  mon  day  hour
0  98047837  232431562              1           4245  2014-12-06 02  12-06   12    6     2
1  97726136  383583590              1           5894  2014-12-09 20  12-09   12    9    20
2  98607707   64749712              1           2883  2014-12-18 11  12-18   12   18    11
3  98662432  320593836              1           6562  2014-12-06 10  12-06   12    6    10
4  98145908  290208520              1          13926  2014-12-16 21  12-16   12   16    21 

收藏行为数据框：
        user_id    item_id  behavior_type  item_category           time   date  mon  day  hour
68   104811265  361649536              2          10523  2014-12-05 08  12-05   12    5     8
230  106230218  129138111              2           2993  2014-11-29 15  11-29   11   29    15
312  103802946   74587958              2           6516  2014-11-29 15  11-29   11   29    15
392  106557109  110337750              2           4296 

In [43]:
# 过程1 点击->加购->购买
# 获取合并数据框
browse_to_cart_df = pd.merge(left=browse_df, right=add_shopping_cart_df, how='inner', on=['user_id','item_id','item_category'], suffixes=('_browse','_add_shopping_cart'))
cart_to_buy_df = pd.merge(left=add_shopping_cart_df, right=buy_df, how='inner', on=['user_id','item_id','item_category'], suffixes=('_add_shopping_cart','_buy'))
# 计算用户转换行为数量
count_users_browse_to_cart = browse_to_cart_df[browse_to_cart_df['date_browse'] < browse_to_cart_df['date_add_shopping_cart']]['user_id'].nunique()
count_users_cart_to_buy = cart_to_buy_df[cart_to_buy_df['date_add_shopping_cart'] < cart_to_buy_df['date_buy']]['user_id'].nunique()
# 输出
print('browse_to_cart_df: \n', browse_to_cart_df, '\n\n')
print('cart_to_buy_df: \n', cart_to_buy_df, '\n\n')
print('有点击→加入购物车用户数量: \n', count_users_browse_to_cart, '\n\n')
print('有加入购物车→购买的用户数量: \n', count_users_cart_to_buy)

browse_to_cart_df: 
           user_id    item_id  behavior_type_browse  item_category    time_browse date_browse  mon_browse  day_browse  hour_browse  behavior_type_add_shopping_cart time_add_shopping_cart date_add_shopping_cart  mon_add_shopping_cart  day_add_shopping_cart  hour_add_shopping_cart
0        93784494  337869048                     1           3979  2014-12-03 20       12-03          12           3           20                                3          2014-12-03 20                  12-03                     12                      3                      20
1        93784494  337869048                     1           3979  2014-12-03 20       12-03          12           3           20                                3          2014-12-04 10                  12-04                     12                      4                      10
2        93784494  337869048                     1           3979  2014-12-03 20       12-03          12           3           20             

In [44]:
# 过程2 点击->收藏->购买
# 获取合并数据框
browse_to_collect_df = pd.merge(left=browse_df, right=collect_df, how='inner', on=['user_id','item_id','item_category'], suffixes=('_browse','_collect'))
collect_to_buy = pd.merge(left=collect_df, right=buy_df, how='inner', on=['user_id','item_id','item_category'], suffixes=('_collect','_buy'))
# 计算用户转换行为数量
count_users_browse_to_collect = browse_to_collect_df[browse_to_collect_df['date_browse'] < browse_to_collect_df['date_collect']]['user_id'].nunique()
count_users_collect_to_buy = collect_to_buy[collect_to_buy['date_collect'] < collect_to_buy['date_buy']]['user_id'].nunique()
# 输出
print('browse_to_collect_df: \n', browse_to_collect_df, '\n\n')
print('collect_to_buy: \n', collect_to_buy, '\n\n')
print('有点击→加入购物车用户数量: \n', count_users_browse_to_collect, '\n\n')
print('有加入购物车→购买的用户数量: \n', count_users_collect_to_buy)

browse_to_collect_df: 
            user_id    item_id  behavior_type_browse  item_category    time_browse date_browse  mon_browse  day_browse  hour_browse  behavior_type_collect   time_collect date_collect  mon_collect  day_collect  hour_collect
0        104811265  322736792                     1           6513  2014-12-11 19       12-11          12          11           19                      2  2014-12-11 19        12-11           12           11            19
1        104811265  322736792                     1           6513  2014-12-11 19       12-11          12          11           19                      2  2014-12-11 19        12-11           12           11            19
2        104811265  322736792                     1           6513  2014-12-11 19       12-11          12          11           19                      2  2014-12-11 19        12-11           12           11            19
3        104811265  328893812                     1           5894  2014-12-05 11       

In [45]:
total_unique_users = df['user_id'].nunique()
print('数据集中用户的数量为：', total_unique_users)   
collect_cart_ratio = (count_users_browse_to_cart)/total_unique_users
buy_ratio=(count_users_cart_to_buy)/total_unique_users

print('收藏加购用户转化率为：%.2f%%'%(collect_cart_ratio*100))
print('购买用户转化率为：%.2f%%'%(buy_ratio*100))

数据集中用户的数量为： 10000
收藏加购用户转化率为：61.32%
购买用户转化率为：44.22%


In [47]:
# 漏斗图datapair
process_data_pair = [("点击量", total_unique_users), 
                     ("加入购物车量", count_users_browse_to_cart),
                     ("购买量", count_users_cart_to_buy)]
(
    Funnel(init_opts=opts.InitOpts(width='500px', height='400px'))
    .add("type", 
         data_pair=process_data_pair, 
         label_opts=opts.LabelOpts(position="top"),
         gap=2,
         tooltip_opts=opts.TooltipOpts(is_show=True))
    .set_global_opts(title_opts=opts.TitleOpts(title="用户转化率", subtitle="过程 浏览->加入购物车->购买")) 
).render_notebook()